Skip to content
Merged
2 changes: 1 addition & 1 deletion clang/test/Misc/loop-opt-setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ int foo(void) {
// CHECK-NOT: br i1

void Helper(void) {
const int *nodes[5];
const int *nodes[5] = {0};
int num_active = 5;

while (num_active)
Expand Down
109 changes: 87 additions & 22 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3277,12 +3277,13 @@ static bool isRemovableWrite(CallBase &CB, Value *UsedV,
return Dest && Dest->Ptr == UsedV;
}

static bool isAllocSiteRemovable(Instruction *AI,
SmallVectorImpl<WeakTrackingVH> &Users,
const TargetLibraryInfo &TLI) {
static std::optional<ModRefInfo>
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
const TargetLibraryInfo &TLI, bool KnowInit) {
SmallVector<Instruction*, 4> Worklist;
const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
Worklist.push_back(AI);
ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;

do {
Instruction *PI = Worklist.pop_back_val();
Expand All @@ -3291,7 +3292,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
switch (I->getOpcode()) {
default:
// Give up the moment we see something we can't handle.
return false;
return std::nullopt;

case Instruction::AddrSpaceCast:
case Instruction::BitCast:
Expand All @@ -3306,10 +3307,10 @@ static bool isAllocSiteRemovable(Instruction *AI,
// We also fold comparisons in some conditions provided the alloc has
// not escaped (see isNeverEqualToUnescapedAlloc).
if (!ICI->isEquality())
return false;
return std::nullopt;
unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
return false;
return std::nullopt;

// Do not fold compares to aligned_alloc calls, as they may have to
// return null in case the required alignment cannot be satisfied,
Expand All @@ -3329,7 +3330,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
!AlignmentAndSizeKnownValid(CB))
return false;
return std::nullopt;
Users.emplace_back(I);
continue;
}
Expand All @@ -3339,14 +3340,21 @@ static bool isAllocSiteRemovable(Instruction *AI,
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default:
return false;
return std::nullopt;

case Intrinsic::memmove:
case Intrinsic::memcpy:
case Intrinsic::memset: {
MemIntrinsic *MI = cast<MemIntrinsic>(II);
if (MI->isVolatile() || MI->getRawDest() != PI)
return false;
if (MI->isVolatile())
return std::nullopt;
// Note: this could also be ModRef, but we can still interpret that
// as just Mod in that case.
ModRefInfo NewAccess =
MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
return std::nullopt;
Access |= NewAccess;
[[fallthrough]];
}
case Intrinsic::assume:
Expand All @@ -3365,11 +3373,6 @@ static bool isAllocSiteRemovable(Instruction *AI,
}
}

if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
Users.emplace_back(I);
continue;
}

if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
getAllocationFamily(I, &TLI) == Family) {
Users.emplace_back(I);
Expand All @@ -3383,20 +3386,43 @@ static bool isAllocSiteRemovable(Instruction *AI,
continue;
}

return false;
if (!isRefSet(Access) &&
isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
Access |= ModRefInfo::Mod;
Users.emplace_back(I);
continue;
}

return std::nullopt;

case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I);
if (SI->isVolatile() || SI->getPointerOperand() != PI)
return false;
return std::nullopt;
if (isRefSet(Access))
return std::nullopt;
Access |= ModRefInfo::Mod;
Users.emplace_back(I);
continue;
}

case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(I);
if (LI->isVolatile() || LI->getPointerOperand() != PI)
return std::nullopt;
if (isModSet(Access))
return std::nullopt;
Access |= ModRefInfo::Ref;
Users.emplace_back(I);
continue;
}
}
llvm_unreachable("missing a return?");
}
} while (!Worklist.empty());
return true;

assert(Access != ModRefInfo::ModRef);
return Access;
}

Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
Expand Down Expand Up @@ -3424,10 +3450,31 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}

if (isAllocSiteRemovable(&MI, Users, TLI)) {
// Determine what getInitialValueOfAllocation would return without actually
// allocating the result.
bool KnowInitUndef = false;
bool KnowInitZero = false;
Constant *Init =
getInitialValueOfAllocation(&MI, &TLI, Type::getInt8Ty(MI.getContext()));
if (Init) {
if (isa<UndefValue>(Init))
KnowInitUndef = true;
else if (Init->isNullValue())
KnowInitZero = true;
}
// The various sanitizers don't actually return undef memory, but rather
// memory initialized with special forms of runtime poison
auto &F = *MI.getFunction();
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::SanitizeAddress))
KnowInitUndef = false;

auto Removable =
isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef);
if (Removable) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
// Lowering all @llvm.objectsize calls first because they may
// use a bitcast/GEP of the alloca we are removing.
// Lowering all @llvm.objectsize and MTI calls first because they may use
// a bitcast/GEP of the alloca we are removing.
if (!Users[i])
continue;

Expand All @@ -3444,6 +3491,17 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
}
if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
if (KnowInitZero && isRefSet(*Removable)) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(MTI);
auto *M = Builder.CreateMemSet(
MTI->getRawDest(),
ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
MTI->getLength(), MTI->getDestAlign());
M->copyMetadata(*MTI);
}
}
}
}
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
Expand All @@ -3466,7 +3524,14 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
} else {
// Casts, GEP, or anything else: we're about to delete this instruction,
// so it can not have any valid uses.
replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
Constant *Replace;
if (isa<LoadInst>(I)) {
assert(KnowInitZero || KnowInitUndef);
Replace = KnowInitUndef ? UndefValue::get(I->getType())
: Constant::getNullValue(I->getType());
} else
Replace = PoisonValue::get(I->getType());
replaceInstUsesWith(*I, Replace);
}
eraseInstFromFunction(*I);
}
Expand Down
74 changes: 64 additions & 10 deletions llvm/test/Transforms/InstCombine/and-or-icmps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -364,23 +364,77 @@ define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {

define void @simplify_before_foldAndOfICmps(ptr %p) {
; CHECK-LABEL: @simplify_before_foldAndOfICmps(
; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2
; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably replace this load with an argument to retain test behavior?

; CHECK-NEXT: store i1 true, ptr [[P:%.*]], align 1
; CHECK-NEXT: store ptr null, ptr [[P]], align 8
; CHECK-NEXT: ret void
;
%A8 = alloca i16
%L7 = load i16, ptr %A8
%G21 = getelementptr i16, ptr %A8, i8 -1
%B11 = udiv i16 %L7, -1
%G4 = getelementptr i16, ptr %A8, i16 %B11
%L2 = load i16, ptr %G4
%L = load i16, ptr %G4
%B23 = mul i16 %B11, %B11
%L4 = load i16, ptr %A8
%B21 = sdiv i16 %L7, %L4
%B7 = sub i16 0, %B21
%B18 = mul i16 %B23, %B7
%C10 = icmp ugt i16 %L, %B11
%B20 = and i16 %L7, %L2
%B1 = mul i1 %C10, true
%C5 = icmp sle i16 %B21, %L
%C11 = icmp ule i16 %B21, %L
%C7 = icmp slt i16 %B20, 0
%B29 = srem i16 %L4, %B18
%B15 = add i1 %C7, %C10
%B19 = add i1 %C11, %B15
%C6 = icmp sge i1 %C11, %B19
%B33 = or i16 %B29, %L4
%C13 = icmp uge i1 %C5, %B1
%C3 = icmp ult i1 %C13, %C6
store i16 undef, ptr %G21
%C18 = icmp ule i1 %C10, %C7
%G26 = getelementptr i1, ptr null, i1 %C3
store i16 %B33, ptr %p
store i1 %C18, ptr %p
store ptr %G26, ptr %p
ret void
}

define void @simplify_before_foldAndOfICmps2(ptr %p, ptr %A8) "instcombine-no-verify-fixpoint" {
; CHECK-LABEL: @simplify_before_foldAndOfICmps2(
; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8:%.*]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1
; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16
; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]]
; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0
; CHECK-NEXT: [[C3:%.*]] = and i1 [[C7]], [[C10]]
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[C10]], true
; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C3]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64
; CHECK-NEXT: [[G4:%.*]] = getelementptr i16, ptr [[A8]], i64 [[TMP2]]
; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[G4]], align 2
; CHECK-NEXT: [[L4:%.*]] = load i16, ptr [[A8]], align 2
; CHECK-NEXT: [[B21:%.*]] = sdiv i16 [[L7]], [[L4]]
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i16 [[B21]], i16 0
; CHECK-NEXT: [[B18:%.*]] = sub i16 0, [[TMP5]]
; CHECK-NEXT: [[C11:%.*]] = icmp ugt i16 [[L2]], [[B11]]
; CHECK-NEXT: [[B20:%.*]] = and i16 [[L7]], [[L2]]
; CHECK-NEXT: [[C5:%.*]] = icmp sgt i16 [[B21]], [[L2]]
; CHECK-NEXT: [[C12:%.*]] = icmp ule i16 [[B21]], [[L2]]
; CHECK-NEXT: [[C10:%.*]] = icmp slt i16 [[B20]], 0
; CHECK-NEXT: [[B29:%.*]] = srem i16 [[L4]], [[B18]]
; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C10]], [[C11]]
; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[C12]], [[B15]]
; CHECK-NEXT: [[C6:%.*]] = xor i1 [[TMP6]], true
; CHECK-NEXT: [[B33:%.*]] = or i16 [[B29]], [[L4]]
; CHECK-NEXT: [[C3:%.*]] = and i1 [[C5]], [[C6]]
; CHECK-NEXT: [[C4:%.*]] = and i1 [[C3]], [[C11]]
; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[C11]], true
; CHECK-NEXT: [[C18:%.*]] = or i1 [[C10]], [[TMP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C4]] to i64
; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP3]]
; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2
; CHECK-NEXT: store i16 [[B33]], ptr [[P:%.*]], align 2
; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1
; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8
; CHECK-NEXT: ret void
;
%A8 = alloca i16
%L7 = load i16, ptr %A8
%G21 = getelementptr i16, ptr %A8, i8 -1
%B11 = udiv i16 %L7, -1
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/InstCombine/apint-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -562,11 +562,10 @@ define i40 @test26(i40 %A) {

; OSS-Fuzz #9880
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9880
define i177 @ossfuzz_9880(i177 %X) {
define i177 @ossfuzz_9880(i177 %X, ptr %A) {
; CHECK-LABEL: @ossfuzz_9880(
; CHECK-NEXT: ret i177 0
;
%A = alloca i177
%L1 = load i177, ptr %A
%B = or i177 0, -1
%B5 = udiv i177 %L1, %B
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/Transforms/InstCombine/call-cast-target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,17 @@ entry:

declare i1 @fn5(ptr byval({ i32, i32 }) align 4 %r)

define i1 @test5() {
; CHECK-LABEL: define i1 @test5() {
; CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32 }, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 4
define i1 @test5(ptr %ptr) {
; CHECK-LABEL: define i1 @test5(ptr %ptr) {
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @fn5(i32 [[TMP2]], i32 [[TMP4]])
; CHECK-NEXT: ret i1 [[TMP5]]
;
%1 = alloca { i32, i32 }, align 4
%2 = getelementptr inbounds { i32, i32 }, ptr %1, i32 0, i32 0
%2 = getelementptr inbounds { i32, i32 }, ptr %ptr, i32 0, i32 0
%3 = load i32, ptr %2, align 4
%4 = getelementptr inbounds { i32, i32 }, ptr %1, i32 0, i32 1
%4 = getelementptr inbounds { i32, i32 }, ptr %ptr, i32 0, i32 1
%5 = load i32, ptr %4, align 4
%6 = call i1 @fn5(i32 %3, i32 %5)
ret i1 %6
Expand Down
Loading
Loading