Skip to content

Commit f8862d4

Browse files
committed
EarlyCSE: create casts on type-mismatch
getOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful.
1 parent 7b624ea commit f8862d4

File tree

8 files changed

+590
-502
lines changed

8 files changed

+590
-502
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/IR/Constants.h"
3232
#include "llvm/IR/Dominators.h"
3333
#include "llvm/IR/Function.h"
34+
#include "llvm/IR/IRBuilder.h"
3435
#include "llvm/IR/InstrTypes.h"
3536
#include "llvm/IR/Instruction.h"
3637
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,45 @@ class EarlyCSE {
964965
bool overridingStores(const ParseMemoryInst &Earlier,
965966
const ParseMemoryInst &Later);
966967

967-
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
968-
// TODO: We could insert relevant casts on type mismatch here.
969-
if (auto *LI = dyn_cast<LoadInst>(Inst))
970-
return LI->getType() == ExpectedType ? LI : nullptr;
971-
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
972-
Value *V = SI->getValueOperand();
973-
return V->getType() == ExpectedType ? V : nullptr;
968+
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
969+
if (!isa<IntrinsicInst, LoadInst, StoreInst>(Inst))
970+
llvm_unreachable("Instruction not supported");
971+
972+
// The load or the store's first operand.
973+
Value *V;
974+
if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
975+
if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
976+
switch (II->getIntrinsicID()) {
977+
case Intrinsic::masked_load:
978+
V = II;
979+
break;
980+
case Intrinsic::masked_store:
981+
V = II->getOperand(0);
982+
break;
983+
default:
984+
return nullptr;
985+
}
986+
else
987+
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
988+
} else {
989+
V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
974990
}
975-
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
976-
auto *II = cast<IntrinsicInst>(Inst);
977-
if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
978-
return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
979-
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
980-
}
981991

982-
Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
983-
Type *ExpectedType) const {
984-
// TODO: We could insert relevant casts on type mismatch here.
985-
switch (II->getIntrinsicID()) {
986-
case Intrinsic::masked_load:
987-
return II->getType() == ExpectedType ? II : nullptr;
988-
case Intrinsic::masked_store: {
989-
Value *V = II->getOperand(0);
990-
return V->getType() == ExpectedType ? V : nullptr;
991-
}
992-
}
992+
Type *ActualType = V->getType();
993+
BasicBlock *TheBB = Inst->getParent();
994+
995+
// First handle the case when no cast is required.
996+
if (ActualType == ExpectedType)
997+
return V;
998+
999+
// Try to create BitCast, SExt, or Trunc.
1000+
IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
1001+
if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
1002+
return Builder.CreateBitCast(V, ExpectedType);
1003+
if (CastInst::castIsValid(Instruction::SExt, V, ExpectedType))
1004+
return Builder.CreateSExt(V, ExpectedType);
1005+
if (CastInst::castIsValid(Instruction::Trunc, V, ExpectedType))
1006+
return Builder.CreateTrunc(V, ExpectedType);
9931007
return nullptr;
9941008
}
9951009

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,7 +3683,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
36833683
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
36843684
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
36853685
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
3686-
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
3686+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD2]](s8)
36873687
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
36883688
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
36893689
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -3720,7 +3720,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
37203720
; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
37213721
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
37223722
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
3723-
; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
3723+
; CHECK-NEXT: G_STORE [[SEXT]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
37243724
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
37253725
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
37263726
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 330 additions & 315 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)