Skip to content

Commit 6d5ef39

Browse files
krystian-andrzejewskisys_zuul
authored andcommitted
Improve IGCConstProp pass to support constant folding in rounding-mode-specific GenISA intrinsics.
Add a new GenISA intrinsic for floating point conversion with the round-to-nearest rounding mode. The use case is when the default shader rounding mode is not set to round to nearest. Change-Id: I2beef976c62235f5f19579985baee4b9daca6046
1 parent d64c77b commit 6d5ef39

File tree

5 files changed

+144
-13
lines changed

5 files changed

+144
-13
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7825,6 +7825,7 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
78257825
case GenISAIntrinsic::GenISA_uitof_rtn:
78267826
case GenISAIntrinsic::GenISA_uitof_rtp:
78277827
case GenISAIntrinsic::GenISA_uitof_rtz:
7828+
case GenISAIntrinsic::GenISA_ftof_rte:
78287829
case GenISAIntrinsic::GenISA_ftof_rtn:
78297830
case GenISAIntrinsic::GenISA_ftof_rtp:
78307831
case GenISAIntrinsic::GenISA_ftof_rtz:
@@ -13913,6 +13914,9 @@ ERoundingMode EmitPass::GetRoundingMode_FP(Instruction* inst)
1391313914
case GenISAIntrinsic::GenISA_uitof_rtp:
1391413915
RM = ERoundingMode::ROUND_TO_POSITIVE;
1391513916
break;
13917+
case GenISAIntrinsic::GenISA_ftof_rte:
13918+
RM = ERoundingMode::ROUND_TO_NEAREST_EVEN;
13919+
break;
1391613920
default:
1391713921
break;
1391813922
}

IGC/Compiler/CISACodeGen/opCode.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ DECLARE_OPCODE(GenISA_PullCentroidBarys, GenISAIntrinsic, llvm_pul
6262
DECLARE_OPCODE(sqrt, Intrinsic, llvm_sqrt, true, true, true, false, true, false, false)
6363
DECLARE_OPCODE(GenISA_rsq, GenISAIntrinsic, llvm_rsq, true, true, true, false, true, false, false)
6464
DECLARE_OPCODE(FMul, Instruction, llvm_fmul, true, true, true, true, false, false, false)
65+
DECLARE_OPCODE(GenISA_mul_rtz, GenISAIntrinsic, llvm_fmul_rtz, true, true, true, true, false, false, false)
6566
DECLARE_OPCODE(FAdd, Instruction, llvm_fadd, true, true, true, true, false, false, false)
67+
DECLARE_OPCODE(GenISA_add_rtz, GenISAIntrinsic, llvm_fadd_rtz, true, true, true, true, false, false, false)
6668
DECLARE_OPCODE(FSub, Instruction, llvm_fsub, true, true, true, true, false, false, false)
6769
DECLARE_OPCODE(FDiv, Instruction, llvm_fdiv, true, true, true, false, false, false, false)
6870
DECLARE_OPCODE(FRem, Instruction, llvm_frem, false, false, false, false, false, false, false)
@@ -117,6 +119,10 @@ DECLARE_OPCODE(SExt, Instruction, llvm_sext, true, true, false, false, false, fa
117119
DECLARE_OPCODE(ZExt, Instruction, llvm_zext, false, true, false, false, false, false, false)
118120
DECLARE_OPCODE(Trunc, Instruction, llvm_trunc, true, false, false, false, false, false, false)
119121
DECLARE_OPCODE(FPTrunc, Instruction, llvm_fptrunc, true, false, false, false, false, false, false)
122+
DECLARE_OPCODE(GenISA_ftof_rte, GenISAIntrinsic, llvm_fptrunc_rte, true, false, false, false, false, false, false)
123+
DECLARE_OPCODE(GenISA_ftof_rtz, GenISAIntrinsic, llvm_fptrunc_rtz, true, false, false, false, false, false, false)
124+
DECLARE_OPCODE(GenISA_ftof_rtp, GenISAIntrinsic, llvm_fptrunc_rtp, true, false, false, false, false, false, false)
125+
DECLARE_OPCODE(GenISA_ftof_rtn, GenISAIntrinsic, llvm_fptrunc_rtn, true, false, false, false, false, false, false)
120126
DECLARE_OPCODE(BitCast, Instruction, llvm_bitcast, false, false, false, false, false, false, false)
121127
DECLARE_OPCODE(PtrToInt, Instruction, llvm_ptrtoint, false, false, false, false, false, false, false)
122128
DECLARE_OPCODE(IntToPtr, Instruction, llvm_inttoptr, false, false, false, false, false, false, false)

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 130 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,6 +2308,84 @@ void GenSpecificPattern::visitFNeg(llvm::UnaryOperator& I)
23082308
}
23092309
#endif
23102310

2311+
llvm::Constant* IGC::IGCConstantFolder::CreateFAdd(llvm::Constant* C0, llvm::Constant* C1, llvm::APFloatBase::roundingMode roundingMode) const
2312+
{
2313+
if (llvm::isa<llvm::UndefValue>(C0) || llvm::isa<llvm::UndefValue>(C1))
2314+
{
2315+
return llvm::ConstantFolder::CreateFAdd(C0, C1);
2316+
}
2317+
llvm::ConstantFP* CFP0 = llvm::cast<ConstantFP>(C0);
2318+
llvm::ConstantFP* CFP1 = llvm::cast<ConstantFP>(C1);
2319+
APFloat firstOperand = CFP0->getValueAPF();
2320+
APFloat secondOperand = CFP1->getValueAPF();
2321+
APFloat::opStatus status = firstOperand.add(secondOperand, roundingMode);
2322+
if (status != APFloat::opInvalidOp)
2323+
{
2324+
return llvm::ConstantFP::get(C0->getContext(), firstOperand);
2325+
}
2326+
else
2327+
{
2328+
return nullptr;
2329+
}
2330+
}
2331+
2332+
llvm::Constant* IGC::IGCConstantFolder::CreateFMul(llvm::Constant* C0, llvm::Constant* C1, llvm::APFloatBase::roundingMode roundingMode) const
2333+
{
2334+
if (llvm::isa<llvm::UndefValue>(C0) || llvm::isa<llvm::UndefValue>(C1))
2335+
{
2336+
return llvm::ConstantFolder::CreateFMul(C0, C1);
2337+
}
2338+
llvm::ConstantFP* CFP0 = llvm::cast<ConstantFP>(C0);
2339+
llvm::ConstantFP* CFP1 = llvm::cast<ConstantFP>(C1);
2340+
APFloat firstOperand = CFP0->getValueAPF();
2341+
APFloat secondOperand = CFP1->getValueAPF();
2342+
APFloat::opStatus status = firstOperand.multiply(secondOperand, roundingMode);
2343+
if (status != APFloat::opInvalidOp)
2344+
{
2345+
return llvm::ConstantFP::get(C0->getContext(), firstOperand);
2346+
}
2347+
else
2348+
{
2349+
return nullptr;
2350+
}
2351+
}
2352+
2353+
llvm::Constant* IGC::IGCConstantFolder::CreateFPTrunc(llvm::Constant* C0, llvm::Type* dstType, llvm::APFloatBase::roundingMode roundingMode) const
2354+
{
2355+
if (llvm::isa<llvm::UndefValue>(C0))
2356+
{
2357+
return llvm::ConstantFolder::CreateFPCast(C0, dstType);
2358+
}
2359+
APFloat APF = llvm::cast<ConstantFP>(C0)->getValueAPF();
2360+
const fltSemantics& outputSemantics = dstType->isHalfTy() ? APFloatBase::IEEEhalf() :
2361+
dstType->isFloatTy() ? APFloatBase::IEEEsingle() :
2362+
APFloatBase::IEEEdouble();
2363+
bool losesInfo = false;
2364+
APFloat::opStatus status = APF.convert(outputSemantics, roundingMode, &losesInfo);
2365+
if (status != APFloat::opInvalidOp)
2366+
{
2367+
return llvm::ConstantFP::get(C0->getContext(), APF);
2368+
}
2369+
else
2370+
{
2371+
return nullptr;
2372+
}
2373+
}
2374+
2375+
llvm::Constant* IGC::IGCConstantFolder::CreateCanonicalize(llvm::Constant* C0, bool flushDenorms /*= true*/) const
2376+
{
2377+
if (llvm::isa<llvm::UndefValue>(C0))
2378+
{
2379+
return C0;
2380+
}
2381+
auto APF = llvm::cast<ConstantFP>(C0)->getValueAPF();
2382+
if (flushDenorms && APF.isDenormal())
2383+
{
2384+
APF = APFloat::getZero(APF.getSemantics(), APF.isNegative());
2385+
}
2386+
return ConstantFP::get(C0->getContext(), APF);
2387+
}
2388+
23112389
// Register pass to igc-opt
23122390
#define PASS_FLAG3 "igc-const-prop"
23132391
#define PASS_DESCRIPTION3 "Custom Const-prop Pass"
@@ -2514,17 +2592,6 @@ Constant* IGCConstProp::replaceShaderConstant(LoadInst* inst)
25142592
return nullptr;
25152593
}
25162594

2517-
llvm::Constant* IGC::IGCConstantFolder::CreateCanonicalize(llvm::Constant* C0, bool flushDenorms /*= true*/) const
2518-
{
2519-
assert(llvm::isa<ConstantFP>(C0));
2520-
auto APF = llvm::cast<ConstantFP>(C0)->getValueAPF();
2521-
if (flushDenorms && APF.isDenormal())
2522-
{
2523-
APF = APFloat::getZero(APF.getSemantics(), APF.isNegative());
2524-
}
2525-
return ConstantFP::get(C0->getContext(), APF);
2526-
}
2527-
25282595
Constant* IGCConstProp::ConstantFoldCallInstruction(CallInst* inst)
25292596
{
25302597
Constant* C = nullptr;
@@ -2534,6 +2601,7 @@ Constant* IGCConstProp::ConstantFoldCallInstruction(CallInst* inst)
25342601
// used for GenISA_sqrt, GenISA_rsq and GenISA_ROUNDNE
25352602
ConstantFP* C0 = dyn_cast<ConstantFP>(inst->getOperand(0));
25362603
EOPCODE igcop = GetOpCode(inst);
2604+
IGCConstantFolder folder;
25372605

25382606
// special case of gen-intrinsic
25392607
switch (igcop)
@@ -2628,18 +2696,67 @@ Constant* IGCConstProp::ConstantFoldCallInstruction(CallInst* inst)
26282696
}
26292697
}
26302698
break;
2699+
case llvm_fptrunc_rte:
2700+
{
2701+
if (C0)
2702+
{
2703+
C = folder.CreateFPTrunc(C0, inst->getType(), llvm::APFloatBase::rmNearestTiesToEven);
2704+
}
2705+
}
2706+
break;
2707+
case llvm_fptrunc_rtz:
2708+
{
2709+
if (C0)
2710+
{
2711+
C = folder.CreateFPTrunc(C0, inst->getType(), llvm::APFloatBase::rmTowardZero);
2712+
}
2713+
}
2714+
break;
2715+
case llvm_fptrunc_rtp:
2716+
{
2717+
if (C0)
2718+
{
2719+
C = folder.CreateFPTrunc(C0, inst->getType(), llvm::APFloatBase::rmTowardPositive);
2720+
}
2721+
}
2722+
break;
2723+
case llvm_fptrunc_rtn:
2724+
{
2725+
if (C0)
2726+
{
2727+
C = folder.CreateFPTrunc(C0, inst->getType(), llvm::APFloatBase::rmTowardNegative);
2728+
}
2729+
}
2730+
break;
2731+
case llvm_fadd_rtz:
2732+
{
2733+
Constant* C1 = dyn_cast<Constant>(inst->getOperand(1));
2734+
if (C0 && C1)
2735+
{
2736+
C = folder.CreateFAdd(C0, C1, llvm::APFloatBase::rmTowardZero);
2737+
}
2738+
}
2739+
break;
2740+
case llvm_fmul_rtz:
2741+
{
2742+
Constant* C1 = dyn_cast<Constant>(inst->getOperand(1));
2743+
if (C0 && C1)
2744+
{
2745+
C = folder.CreateFMul(C0, C1, llvm::APFloatBase::rmTowardZero);
2746+
}
2747+
}
2748+
break;
26312749
case llvm_canonicalize:
26322750
{
26332751
// If the instruction should be emitted anyway, then remove the condition.
26342752
// Please, be aware of the fact that clients can understand the term canonical FP value in other way.
26352753
if (C0)
26362754
{
2637-
IGCConstantFolder constantFolder;
26382755
CodeGenContext* pCodeGenContext = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
26392756
bool flushVal = pCodeGenContext->m_floatDenormMode16 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isHalfTy();
26402757
flushVal = flushVal || (pCodeGenContext->m_floatDenormMode32 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isFloatTy());
26412758
flushVal = flushVal || (pCodeGenContext->m_floatDenormMode64 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isDoubleTy());
2642-
C = constantFolder.CreateCanonicalize(C0, flushVal);
2759+
C = folder.CreateCanonicalize(C0, flushVal);
26432760
}
26442761
}
26452762
break;

IGC/Compiler/CustomSafeOptPass.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ namespace IGC
212212
{}
213213

214214
llvm::Constant* CreateCanonicalize(llvm::Constant* C0, bool flushDenorms = true) const;
215+
llvm::Constant* CreateFAdd(llvm::Constant* C0, llvm::Constant* C1, llvm::APFloatBase::roundingMode roundingMode) const;
216+
llvm::Constant* CreateFMul(llvm::Constant* C0, llvm::Constant* C1, llvm::APFloatBase::roundingMode roundingMode) const;
217+
llvm::Constant* CreateFPTrunc(llvm::Constant* C0, llvm::Type* dstType, llvm::APFloatBase::roundingMode roundingMode) const;
215218
};
216219

217220
class IGCConstProp : public llvm::FunctionPass

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"GenISA_ftoui_rte": ["anyint",["anyfloat"],"NoMem"],
6767
"GenISA_ftoui_rtp": ["anyint",["anyfloat"],"NoMem"],
6868
"GenISA_ftoui_rtn": ["anyint",["anyfloat"],"NoMem"],
69+
"GenISA_ftof_rte": ["anyfloat",["anyfloat"],"NoMem"],
6970
"GenISA_ftof_rtn": ["anyfloat",["anyfloat"],"NoMem"],
7071
"GenISA_ftof_rtp": ["anyfloat",["anyfloat"],"NoMem"],
7172
"GenISA_ftof_rtz": ["anyfloat",["anyfloat"],"NoMem"],

0 commit comments

Comments
 (0)