Skip to content

Commit fcb5184

Browse files
committed
EarlyCSE: create casts on type-mismatch
getOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful.
1 parent ee29eb1 commit fcb5184

File tree

7 files changed

+416
-393
lines changed

7 files changed

+416
-393
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/IR/Constants.h"
3232
#include "llvm/IR/Dominators.h"
3333
#include "llvm/IR/Function.h"
34+
#include "llvm/IR/IRBuilder.h"
3435
#include "llvm/IR/InstrTypes.h"
3536
#include "llvm/IR/Instruction.h"
3637
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,39 @@ class EarlyCSE {
964965
bool overridingStores(const ParseMemoryInst &Earlier,
965966
const ParseMemoryInst &Later);
966967

967-
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
968-
// TODO: We could insert relevant casts on type mismatch here.
969-
if (auto *LI = dyn_cast<LoadInst>(Inst))
970-
return LI->getType() == ExpectedType ? LI : nullptr;
971-
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
972-
Value *V = SI->getValueOperand();
973-
return V->getType() == ExpectedType ? V : nullptr;
968+
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
969+
// The load or the store's first operand.
970+
Value *V;
971+
if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
972+
switch (II->getIntrinsicID()) {
973+
case Intrinsic::masked_load:
974+
V = II;
975+
break;
976+
case Intrinsic::masked_store:
977+
V = II->getOperand(0);
978+
break;
979+
default:
980+
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
981+
}
982+
} else {
983+
V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
974984
}
975-
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
976-
auto *II = cast<IntrinsicInst>(Inst);
977-
if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
978-
return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
979-
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
980-
}
981985

982-
Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
983-
Type *ExpectedType) const {
984-
// TODO: We could insert relevant casts on type mismatch here.
985-
switch (II->getIntrinsicID()) {
986-
case Intrinsic::masked_load:
987-
return II->getType() == ExpectedType ? II : nullptr;
988-
case Intrinsic::masked_store: {
989-
Value *V = II->getOperand(0);
990-
return V->getType() == ExpectedType ? V : nullptr;
991-
}
992-
}
986+
Type *ActualType = V->getType();
987+
BasicBlock *TheBB = Inst->getParent();
988+
989+
// First handle the case when no cast is required.
990+
if (ActualType == ExpectedType)
991+
return V;
992+
993+
// Try to create BitCast, SExt, or Trunc.
994+
IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
995+
if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
996+
return Builder.CreateBitCast(V, ExpectedType);
997+
if (CastInst::castIsValid(Instruction::SExt, V, ExpectedType))
998+
return Builder.CreateSExt(V, ExpectedType);
999+
if (CastInst::castIsValid(Instruction::Trunc, V, ExpectedType))
1000+
return Builder.CreateTrunc(V, ExpectedType);
9931001
return nullptr;
9941002
}
9951003

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3684,7 +3684,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
36843684
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
36853685
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
36863686
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
3687-
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
3687+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD2]](s8)
36883688
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
36893689
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
36903690
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -3721,7 +3721,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
37213721
; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
37223722
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
37233723
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
3724-
; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
3724+
; CHECK-NEXT: G_STORE [[SEXT]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
37253725
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
37263726
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
37273727
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)

0 commit comments

Comments
 (0)