@@ -23,6 +23,7 @@ SPDX-License-Identifier: MIT
2323#include " llvm/Linker/Linker.h"
2424#include " llvm/Support/SourceMgr.h"
2525#include " llvm/IRReader/IRReader.h"
26+ #include " llvm/Transforms/Utils/BasicBlockUtils.h"
2627#include " common/LLVMWarningsPop.hpp"
2728#include " AdaptorCommon/ImplicitArgs.hpp"
2829#include " AdaptorCommon/AddImplicitArgs.hpp"
@@ -596,6 +597,7 @@ inline bool isPrecompiledEmulationFunction(Function* func)
596597 func->getName ().contains (" precompiled_u32divrem" ) ||
597598 func->getName ().contains (" precompiled_s32divrem_sp" ) ||
598599 func->getName ().contains (" precompiled_u32divrem_sp" ) ||
600+ func->getName ().contains (" precompiled_convert_f64_to_f16" ) ||
599601 func->getName ().contains (" __igcbuiltin_sp_div" ) ||
600602 func->getName ().contains (" __igcbuiltin_dp_div_nomadm_ieee" );
601603}
@@ -765,44 +767,29 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
765767 {
766768 createIntrinsicCall (CI, GenISAIntrinsic::GenISA_fma_rtn);
767769 }
768- }
769- }
770- }
771- }
772- }
773- }
774-
775- // post-process the Int32 precompiled emulation function for div/rem
776- if (isI32DivRem () || isI32DivRemSP () || isSPDiv ())
777- {
778- for (auto FI = M.begin (), FE = M.end (); FI != FE; )
779- {
780- llvm::Function* func = &(*FI);
781- ++FI;
782- if (isPrecompiledEmulationFunction (func))
783- {
784- for (auto BBI = func->begin (), BBE = func->end (); BBI != BBE; )
785- {
786- llvm::BasicBlock* BB = &(*BBI);
787- ++BBI;
788- for (auto I = BB->begin (), IE = BB->end (); I != IE; I++)
789- {
790- if (CallInst * CI = dyn_cast<CallInst>(I))
791- {
792- if (Function* calledFunc = CI->getCalledFunction ())
770+ else if (calledFunc->getName ().startswith (" GenISA_add_rte" ))
771+ {
772+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rte);
773+ }
774+ else if (calledFunc->getName ().startswith (" GenISA_add_rtz" ))
775+ {
776+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtz);
777+ }
778+ else if (calledFunc->getName ().startswith (" GenISA_add_rtn" ))
779+ {
780+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtn);
781+ }
782+ else if (calledFunc->getName ().startswith (" GenISA_add_rtp" ))
783+ {
784+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtp);
785+ }
786+ else if (calledFunc->getName ().startswith (" GenISA_mul_rtz" ))
787+ {
788+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_mul_rtz);
789+ }
790+ else if (calledFunc->getName ().startswith (" GenISA_uitof_rtz" ))
793791 {
794- if (calledFunc->getName ().startswith (" GenISA_mul_rtz" ))
795- {
796- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_mul_rtz);
797- }
798- else if (calledFunc->getName ().startswith (" GenISA_add_rtz" ))
799- {
800- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtz);
801- }
802- else if (calledFunc->getName ().startswith (" GenISA_uitof_rtz" ))
803- {
804- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_uitof_rtz);
805- }
792+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_uitof_rtz);
806793 }
807794 }
808795 }
@@ -1525,7 +1512,7 @@ void PreCompiledFuncImport::getInt64DivideEmuType(EmulatedFunctions function, un
15251512void PreCompiledFuncImport::visitFPTruncInst (llvm::FPTruncInst& inst)
15261513{
15271514 m_pCtx->metrics .StatBeginEmuFunc (&inst);
1528- if ((isFP64toFP16 () || isDPEmu () || isDPConvEmu ()) &&
1515+ if ((isRTEFP64toFP16 () || isDPEmu () || isDPConvEmu ()) &&
15291516 inst.getDestTy ()->isHalfTy () && inst.getSrcTy ()->isDoubleTy ())
15301517 {
15311518 if (inst.getDestTy ()->isVectorTy ())
@@ -1534,7 +1521,16 @@ void PreCompiledFuncImport::visitFPTruncInst(llvm::FPTruncInst& inst)
15341521 return ;
15351522 }
15361523
1537- const StringRef funcName = " __precompiled_convert_f64_to_f16" ;
1524+ ERoundingMode RM = static_cast <ERoundingMode>(m_pCtx->getModuleMetaData ()->compOpt .FloatRoundingMode );
1525+ if (isRTEFP64toFP16 () && !isDPEmu () && !isDPConvEmu () && RM != ERoundingMode::ROUND_TO_NEAREST_EVEN)
1526+ {
1527+ // It is safe to use a sequence of conversions for RTZ, RTN and RTP (fp64 -> fp32 -> fp16)
1528+ return ;
1529+ }
1530+ const StringRef funcName = RM == ERoundingMode::ROUND_TO_ZERO ? " __precompiled_convert_f64_to_f16_rtz" :
1531+ RM == ERoundingMode::ROUND_TO_NEGATIVE ? " __precompiled_convert_f64_to_f16_rtn" :
1532+ RM == ERoundingMode::ROUND_TO_POSITIVE ? " __precompiled_convert_f64_to_f16_rtp" :
1533+ " __precompiled_convert_f64_to_f16_rte" ;
15381534 Function* func = m_pModule->getFunction (funcName);
15391535
15401536 // Try to look up the function in the module's symbol
@@ -1553,12 +1549,9 @@ void PreCompiledFuncImport::visitFPTruncInst(llvm::FPTruncInst& inst)
15531549 m_pModule);
15541550 }
15551551
1556- CallInst* funcCall = CallInst::Create (func, inst.getOperand (0 ), inst. getName (), &inst );
1552+ CallInst* funcCall = CallInst::Create (func, inst.getOperand (0 ));
15571553 addCallInst (funcCall);
1558- funcCall->setDebugLoc (inst.getDebugLoc ());
1559-
1560- inst.replaceAllUsesWith (funcCall);
1561- inst.eraseFromParent ();
1554+ ReplaceInstWithInst (&inst, funcCall);
15621555
15631556 m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true ;
15641557 m_changed = true ;
@@ -2307,11 +2300,8 @@ As a result, we reduce 2x necessary work
23072300 m_CallRemDiv.push_back (&I);
23082301 }
23092302
2310- if (!isDPEmu () && !isDPDivSqrtEmu ()) {
2311- return ;
2312- }
2313-
2314- if (resTy->isDoubleTy () &&
2303+ if ((isDPEmu () || isDPDivSqrtEmu ()) &&
2304+ resTy->isDoubleTy () &&
23152305 (II && II->getIntrinsicID () == Intrinsic::sqrt))
23162306 {
23172307 FunctionIDs sqrtType = FUNCTION_DP_SQRT;
@@ -2346,12 +2336,8 @@ As a result, we reduce 2x necessary work
23462336 return ;
23472337 }
23482338
2349- if (!isDPEmu ()) {
2350- return ;
2351- }
2352-
23532339 // llvm.fma.f64
2354- if (resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fma)
2340+ if (isDPEmu () && resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fma)
23552341 {
23562342 Function* newFunc = getOrCreateFunction (FUNCTION_DP_FMA);
23572343 Function* CurrFunc = I.getParent ()->getParent ();
@@ -2378,7 +2364,7 @@ As a result, we reduce 2x necessary work
23782364 // llvm.fma.rtn.f64
23792365 // llvm.fma.rtp.f64
23802366 // llvm.fma.rtz.f64
2381- if (resTy->isDoubleTy () && GII &&
2367+ if (isDPEmu () && resTy->isDoubleTy () && GII &&
23822368 (GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtn ||
23832369 GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtp ||
23842370 GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtz))
@@ -2421,8 +2407,57 @@ As a result, we reduce 2x necessary work
24212407 return ;
24222408 }
24232409
2410+ if ((isRTEFP64toFP16 () || isDPEmu () || isDPConvEmu ()) &&
2411+ GII &&
2412+ (GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rte ||
2413+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtz ||
2414+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtn ||
2415+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtp) &&
2416+ resTy->isHalfTy () && GII->getOperand (0 )->getType ()->isDoubleTy ())
2417+ {
2418+ if (isRTEFP64toFP16 () && !isDPEmu () && !isDPConvEmu () && GII->getIntrinsicID () != GenISAIntrinsic::GenISA_ftof_rte)
2419+ {
2420+ // It is safe to use a sequence of conversions for RTZ, RTN and RTP (fp64 -> fp32 -> fp16)
2421+ return ;
2422+ }
2423+ const StringRef funcName =
2424+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rte ?
2425+ " __precompiled_convert_f64_to_f16_rte" :
2426+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtz ?
2427+ " __precompiled_convert_f64_to_f16_rtz" :
2428+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtn ?
2429+ " __precompiled_convert_f64_to_f16_rtn" :
2430+ " __precompiled_convert_f64_to_f16_rtp" ;
2431+ Function* func = m_pModule->getFunction (funcName);
2432+
2433+ // Try to look up the function in the module's symbol
2434+ // table first, else add it.
2435+ if (func == NULL )
2436+ {
2437+ FunctionType* FuncIntrType = FunctionType::get (
2438+ resTy,
2439+ I.getOperand (0 )->getType (),
2440+ false );
2441+
2442+ func = Function::Create (
2443+ FuncIntrType,
2444+ GlobalValue::ExternalLinkage,
2445+ funcName,
2446+ m_pModule);
2447+ }
2448+
2449+ CallInst* funcCall = CallInst::Create (func, GII->getOperand (0 ));
2450+ ReplaceInstWithInst (GII, funcCall);
2451+ addCallInst (funcCall);
2452+
2453+ m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true ;
2454+ m_changed = true ;
2455+ m_pCtx->metrics .StatEndEmuFunc (funcCall);
2456+ return ;
2457+ }
2458+
24242459 // llvm.fabs.f64
2425- if (resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fabs)
2460+ if (isDPEmu () && resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fabs)
24262461 {
24272462 // bit 63 is sign bit, set it to zero. Don't use int64.
24282463 VectorType* vec2Ty = IGCLLVM::FixedVectorType::get (intTy, 2 );
0 commit comments