Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62119,3 +62119,16 @@ Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
return TargetLowering::getPrefLoopAlignment();
}

bool X86TargetLowering::isProfitableToHoist(Instruction *I) const {
// Casts taking a constant expression (generally derived from a global
// variable address) as an operand are not profitable to CSE because they
// appear as subexpressions in the instruction sequence generated by the
// LowerTypeTests pass which is expected to pattern match to the rotate
// instruction. These casts are expected to be code generated into the
// instruction's immediate operand, so there is no benefit to hoisting them
// anyway.
if (isa<CastInst>(I) && isa<ConstantExpr>(I->getOperand(0)))
return false;
return true;
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1669,6 +1669,8 @@ namespace llvm {

Align getPrefLoopAlignment(MachineLoop *ML) const override;

bool isProfitableToHoist(Instruction *I) const override;

EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
if (VT == MVT::f80)
return EVT::getIntegerVT(Context, 96);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Scalar/GVN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2724,7 +2724,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// If this GVN would effectively hoist the instruction and that's known to be
// unprofitable, don't do it. Remember the instruction so that it can be used
// by other instructions in the same block.
if (auto *ReplI = dyn_cast<Instruction>(I)) {
if (auto *ReplI = dyn_cast<Instruction>(Repl)) {
if (ReplI->getParent() != I->getParent() && !TTI->isProfitableToHoist(I)) {
LeaderTable.insert(Num, I, I->getParent());
return false;
Expand Down Expand Up @@ -2891,7 +2891,7 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
isa<DbgInfoIntrinsic>(CurInst) || !TTI->isProfitableToHoist(CurInst))
return false;

// Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
Expand Down
179 changes: 179 additions & 0 deletions llvm/test/Other/inhibit-zext-constant-hoist.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
; REQUIRES: x86-registered-target
; Make sure that optimizations do not hoist zext(const).

; This IR is normally generated by LowerTypeTests during ThinLTO importing
; so it will go through the ThinLTO pass pipeline.
; RUN: opt -passes='thinlto<O0>' -S < %s | FileCheck %s
; RUN: opt -passes='thinlto<O1>' -S < %s | FileCheck %s
; RUN: opt -passes='thinlto<O2>' -S < %s | FileCheck %s
; RUN: opt -passes='thinlto<O3>' -S < %s | FileCheck %s

; Also check the regular pipelines for completeness.
; RUN: opt -O0 -S < %s | FileCheck %s
; RUN: opt -O1 -S < %s | FileCheck %s
; RUN: opt -O2 -S < %s | FileCheck %s
; RUN: opt -O3 -S < %s | FileCheck %s
; RUN: opt -Os -S < %s | FileCheck %s
; RUN: opt -Oz -S < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1

; Check that we still have two pairs of zexts (non dominating case).

; CHECK: define void @f1
define void @f1(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
br i1 %0, label %4, label %17

4:
%5 = load ptr, ptr %1, align 8
%6 = ptrtoint ptr %5 to i64
%7 = sub i64 %6, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%8 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%9 = lshr i64 %7, %8
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%10 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%11 = shl i64 %7, %10
%12 = or i64 %9, %11
%13 = icmp ule i64 %12, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %13, label %15, label %14

14:
call void @llvm.ubsantrap(i8 2)
unreachable

15:
%16 = load ptr, ptr %5, align 8
call void %16(ptr noundef nonnull align 8 dereferenceable(8) %1)
br label %30

17:
%18 = load ptr, ptr %2, align 8
%19 = ptrtoint ptr %18 to i64
%20 = sub i64 %19, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%21 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%22 = lshr i64 %20, %21
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%23 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%24 = shl i64 %20, %23
%25 = or i64 %22, %24
%26 = icmp ule i64 %25, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %26, label %28, label %27

27:
call void @llvm.ubsantrap(i8 2) #6
unreachable

28:
%29 = load ptr, ptr %18, align 8
call void %29(ptr noundef nonnull align 8 dereferenceable(8) %2)
br label %30

30:
ret void
}

; Check that we still have two pairs of zexts (dominating case).

; CHECK: define void @f2
define void @f2(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
%4 = load ptr, ptr %1, align 8
%5 = ptrtoint ptr %4 to i64
%6 = sub i64 %5, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%7 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%8 = lshr i64 %6, %7
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%9 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%10 = shl i64 %6, %9
%11 = or i64 %8, %10
%12 = icmp ule i64 %11, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %12, label %14, label %13

13: ; preds = %3
call void @llvm.ubsantrap(i8 2)
unreachable

14: ; preds = %3
%15 = load ptr, ptr %4, align 8
call void %15(ptr noundef nonnull align 8 dereferenceable(8) %1)
br i1 %0, label %16, label %29

16: ; preds = %14
%17 = load ptr, ptr %2, align 8
%18 = ptrtoint ptr %17 to i64
%19 = sub i64 %18, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%20 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%21 = lshr i64 %19, %20
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%22 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%23 = shl i64 %19, %22
%24 = or i64 %21, %23
%25 = icmp ule i64 %24, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %25, label %27, label %26

26: ; preds = %16
call void @llvm.ubsantrap(i8 2) #6
unreachable

27: ; preds = %16
%28 = load ptr, ptr %17, align 8
call void %28(ptr noundef nonnull align 8 dereferenceable(8) %2)
br label %29

29: ; preds = %27, %14
ret void
}

; Check that the zexts aren't moved to the preheader (or anywhere else)
; and stay in the same basic block.

; CHECK: define void @f3
define void @f3(ptr noundef readonly captures(address) %0, ptr noundef readnone captures(address) %1) {
%3 = icmp eq ptr %0, %1
br i1 %3, label %21, label %4

4:
; CHECK: = phi
%5 = phi ptr [ %19, %17 ], [ %0, %2 ]
%6 = load ptr, ptr %5, align 8
%7 = load ptr, ptr %6, align 8
%8 = ptrtoint ptr %7 to i64
%9 = sub i64 %8, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%10 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%11 = lshr i64 %9, %10
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%12 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%13 = shl i64 %9, %12
%14 = or i64 %11, %13
%15 = icmp ule i64 %14, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %15, label %17, label %16

16:
call void @llvm.ubsantrap(i8 2)
unreachable

17:
%18 = load ptr, ptr %7, align 8
call void %18(ptr noundef nonnull align 8 dereferenceable(8) %6)
%19 = getelementptr inbounds nuw i8, ptr %5, i64 8
%20 = icmp eq ptr %19, %1
br i1 %20, label %21, label %4

21:
ret void
}

declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.ubsantrap(i8 immarg)

!0 = !{i64 0, i64 256}
!1 = !{i64 0, i64 128}
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.