Skip to content

Commit c047193

Browse files
committed
EarlyCSE: create casts on type-mismatch
getOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful.
1 parent ee29eb1 commit c047193

File tree

3 files changed

+48
-38
lines changed

3 files changed

+48
-38
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/IR/Constants.h"
3232
#include "llvm/IR/Dominators.h"
3333
#include "llvm/IR/Function.h"
34+
#include "llvm/IR/IRBuilder.h"
3435
#include "llvm/IR/InstrTypes.h"
3536
#include "llvm/IR/Instruction.h"
3637
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,35 @@ class EarlyCSE {
964965
bool overridingStores(const ParseMemoryInst &Earlier,
965966
const ParseMemoryInst &Later);
966967

967-
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
968-
// TODO: We could insert relevant casts on type mismatch here.
969-
if (auto *LI = dyn_cast<LoadInst>(Inst))
970-
return LI->getType() == ExpectedType ? LI : nullptr;
971-
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
972-
Value *V = SI->getValueOperand();
973-
return V->getType() == ExpectedType ? V : nullptr;
968+
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
969+
// The load or the store's first operand.
970+
Value *V;
971+
if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
972+
switch (II->getIntrinsicID()) {
973+
case Intrinsic::masked_load:
974+
V = II;
975+
break;
976+
case Intrinsic::masked_store:
977+
V = II->getOperand(0);
978+
break;
979+
default:
980+
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
981+
}
982+
} else {
983+
V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
974984
}
975-
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
976-
auto *II = cast<IntrinsicInst>(Inst);
977-
if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
978-
return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
979-
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
980-
}
981985

982-
Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
983-
Type *ExpectedType) const {
984-
// TODO: We could insert relevant casts on type mismatch here.
985-
switch (II->getIntrinsicID()) {
986-
case Intrinsic::masked_load:
987-
return II->getType() == ExpectedType ? II : nullptr;
988-
case Intrinsic::masked_store: {
989-
Value *V = II->getOperand(0);
990-
return V->getType() == ExpectedType ? V : nullptr;
991-
}
992-
}
986+
Type *ActualType = V->getType();
987+
BasicBlock *TheBB = Inst->getParent();
988+
989+
// First handle the case when no cast is required.
990+
if (ActualType == ExpectedType)
991+
return V;
992+
993+
// If valid, create a bitcast.
994+
IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
995+
if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
996+
return Builder.CreateBitCast(V, ExpectedType);
993997
return nullptr;
994998
}
995999

llvm/test/Transforms/EarlyCSE/invariant.start.ll

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -472,15 +472,22 @@ define void @test_dse_after_load(ptr %p, i1 %cnd) {
472472
; typed due to the user of a Value to represent the address. Note that other
473473
; passes will canonicalize away the bitcasts in this example.
474474
define i32 @test_false_negative_types(ptr %p) {
475-
; CHECK-LABEL: define {{[^@]+}}@test_false_negative_types
476-
; CHECK-SAME: (ptr [[P:%.*]]) {
477-
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
478-
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
479-
; CHECK-NEXT: call void @clobber()
480-
; CHECK-NEXT: [[V2F:%.*]] = load float, ptr [[P]], align 4
481-
; CHECK-NEXT: [[V2:%.*]] = bitcast float [[V2F]] to i32
482-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[V1]], [[V2]]
483-
; CHECK-NEXT: ret i32 [[SUB]]
475+
; NO_ASSUME-LABEL: define {{[^@]+}}@test_false_negative_types
476+
; NO_ASSUME-SAME: (ptr [[P:%.*]]) {
477+
; NO_ASSUME-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
478+
; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
479+
; NO_ASSUME-NEXT: [[TMP2:%.*]] = bitcast i32 [[V1]] to float
480+
; NO_ASSUME-NEXT: call void @clobber()
481+
; NO_ASSUME-NEXT: ret i32 0
482+
;
483+
; USE_ASSUME-LABEL: define {{[^@]+}}@test_false_negative_types
484+
; USE_ASSUME-SAME: (ptr [[P:%.*]]) {
485+
; USE_ASSUME-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
486+
; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
487+
; USE_ASSUME-NEXT: [[TMP2:%.*]] = bitcast i32 [[V1]] to float
488+
; USE_ASSUME-NEXT: call void @clobber()
489+
; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 4), "nonnull"(ptr [[P]]), "align"(ptr [[P]], i64 4) ]
490+
; USE_ASSUME-NEXT: ret i32 0
484491
;
485492
call ptr @llvm.invariant.start.p0(i64 4, ptr %p)
486493
%v1 = load i32, ptr %p
@@ -571,13 +578,13 @@ define i32 @test_false_negative_scope(ptr %p) {
571578
define i32 @test_invariant_load_scope(ptr %p) {
572579
; NO_ASSUME-LABEL: define {{[^@]+}}@test_invariant_load_scope
573580
; NO_ASSUME-SAME: (ptr [[P:%.*]]) {
574-
; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load !4
581+
; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META4:![0-9]+]]
575582
; NO_ASSUME-NEXT: call void @clobber()
576583
; NO_ASSUME-NEXT: ret i32 0
577584
;
578585
; USE_ASSUME-LABEL: define {{[^@]+}}@test_invariant_load_scope
579586
; USE_ASSUME-SAME: (ptr [[P:%.*]]) {
580-
; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load !4
587+
; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META4:![0-9]+]]
581588
; USE_ASSUME-NEXT: call void @clobber()
582589
; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 4), "nonnull"(ptr [[P]]), "align"(ptr [[P]], i64 4) ]
583590
; USE_ASSUME-NEXT: ret i32 0
@@ -589,7 +596,6 @@ define i32 @test_invariant_load_scope(ptr %p) {
589596
ret i32 %sub
590597
}
591598

592-
; USE_ASSUME: declare void @llvm.assume(i1 noundef)
593599

594600
!0 = !{!1, !1, i64 0}
595601
!1 = !{!"float", !2, i64 0}

llvm/test/Transforms/EarlyCSE/opaque-ptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ define i32 @different_types_store(ptr %p, i32 %a) {
5151
define i32 @different_elt_types_vector_load(ptr %p, <4 x i1> %c) {
5252
; CHECK-LABEL: @different_elt_types_vector_load(
5353
; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[P:%.*]], i32 4, <4 x i1> [[C:%.*]], <4 x i32> poison)
54-
; CHECK-NEXT: [[V2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> [[C]], <4 x float> poison)
54+
; CHECK-NEXT: [[V2:%.*]] = bitcast <4 x i32> [[V1]] to <4 x float>
5555
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[V1]], i32 0
5656
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x float> [[V2]], i32 0
5757
; CHECK-NEXT: [[E2I:%.*]] = fptosi float [[E2]] to i32
@@ -70,7 +70,7 @@ define i32 @different_elt_types_vector_load(ptr %p, <4 x i1> %c) {
7070
define float @different_elt_types_vector_store_load(ptr %p, <4 x i32> %v1, <4 x i1> %c) {
7171
; CHECK-LABEL: @different_elt_types_vector_store_load(
7272
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V1:%.*]], ptr [[P:%.*]], i32 4, <4 x i1> [[C:%.*]])
73-
; CHECK-NEXT: [[V2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> [[C]], <4 x float> poison)
73+
; CHECK-NEXT: [[V2:%.*]] = bitcast <4 x i32> [[V1]] to <4 x float>
7474
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x float> [[V2]], i32 0
7575
; CHECK-NEXT: ret float [[E2]]
7676
;

0 commit comments

Comments
 (0)