@@ -2738,37 +2738,46 @@ namespace IGC
27382738 V (vKernel->AppendVISACFRetInst (predOpnd, vISA_EMASK_M1, EXEC_SIZE_1));
27392739 }
27402740
2741- void CEncoder::SetFloatDenormMode (VISAKernel* vKernel, Float_DenormMode mode16,
2742- Float_DenormMode mode32, Float_DenormMode mode64 )
2741+ // Init Control register for denorm modes, rounding modes, etc.
2742+ void CEncoder::initCR (VISAKernel* vKernel )
27432743 {
2744- VISA_VectorOpnd* src0_Opnd = nullptr ;
2745- VISA_VectorOpnd* src1_Opnd = nullptr ;
2746- VISA_VectorOpnd* dst_Opnd = nullptr ;
2747- VISA_GenVar* cr0_var = nullptr ;
2748- uint imm_data = 0 ;
2749- if (mode16 == FLOAT_DENORM_RETAIN)
2750- imm_data |= 0x400 ;
2751- if (mode32 == FLOAT_DENORM_RETAIN)
2752- imm_data |= 0x80 ;
2753- if (mode64 == FLOAT_DENORM_RETAIN)
2754- imm_data |= 0x40 ;
2755- // If we are in the default mode no need to set the CR
2744+ // Those bits must be zero'ed on entry to kernel/shader.
2745+ // (If not, this function needs to be changed accordingly.)
2746+ VISA_VectorOpnd* src0_Opnd = nullptr ;
2747+ VISA_VectorOpnd* src1_Opnd = nullptr ;
2748+ VISA_VectorOpnd* dst_Opnd = nullptr ;
2749+ VISA_GenVar* cr0_var = nullptr ;
2750+ uint imm_data = 0 ;
2751+
2752+ CodeGenContext* pCtx = m_program->GetContext ();
2753+ if (pCtx->m_floatDenormMode16 == FLOAT_DENORM_RETAIN)
2754+ imm_data |= 0x400 ;
2755+ if (pCtx->m_floatDenormMode32 == FLOAT_DENORM_RETAIN)
2756+ imm_data |= 0x80 ;
2757+ if (pCtx->m_floatDenormMode64 == FLOAT_DENORM_RETAIN)
2758+ imm_data |= 0x40 ;
2759+
2760+ uint RM_bits = getEncoderRoundingMode (
2761+ static_cast <Float_RoundingMode>(pCtx->getModuleMetaData ()->compOpt .FloatRoundingMode ));
2762+ imm_data |= RM_bits;
2763+
2764+ // If we are in the default mode no need to set the CR
27562765 if (imm_data != 0 )
2757- {
2758- V (vKernel->GetPredefinedVar (cr0_var, PREDEFINED_CR0));
2759- V (vKernel->CreateVISASrcOperand (src0_Opnd, cr0_var, MODIFIER_NONE, 0 , 1 , 0 , 0 , 0 ));
2760- V (vKernel->CreateVISAImmediate (src1_Opnd, &imm_data, ISA_TYPE_UD));
2761- V (vKernel->CreateVISADstOperand (dst_Opnd, cr0_var, 1 , 0 , 0 ));
2762- V (vKernel->AppendVISAArithmeticInst (
2763- ISA_OR,
2764- nullptr ,
2765- false ,
2766- vISA_EMASK_M1_NM,
2767- EXEC_SIZE_1,
2768- dst_Opnd,
2769- src0_Opnd,
2770- src1_Opnd));
2771- }
2766+ {
2767+ V (vKernel->GetPredefinedVar (cr0_var, PREDEFINED_CR0));
2768+ V (vKernel->CreateVISASrcOperand (src0_Opnd, cr0_var, MODIFIER_NONE, 0 , 1 , 0 , 0 , 0 ));
2769+ V (vKernel->CreateVISAImmediate (src1_Opnd, &imm_data, ISA_TYPE_UD));
2770+ V (vKernel->CreateVISADstOperand (dst_Opnd, cr0_var, 1 , 0 , 0 ));
2771+ V (vKernel->AppendVISAArithmeticInst (
2772+ ISA_OR,
2773+ nullptr ,
2774+ false ,
2775+ vISA_EMASK_M1_NM,
2776+ EXEC_SIZE_1,
2777+ dst_Opnd,
2778+ src0_Opnd,
2779+ src1_Opnd));
2780+ }
27722781 }
27732782
27742783 void CEncoder::SetVectorMask (bool VMask)
@@ -4192,126 +4201,119 @@ namespace IGC
41924201
41934202 void CEncoder::InitEncoder (bool canAbortOnSpill, bool hasStackCall)
41944203 {
4195- m_aliasesMap.clear ();
4196- m_encoderState.m_SubSpanDestination = false ;
4197- CodeGenContext* context = m_program->GetContext ();
4198- m_encoderState.m_secondHalf = false ;
4199- m_enableVISAdump = false ;
4200- labelMap.clear ();
4201- labelMap.resize (m_program->entry ->size (), nullptr );
4202- labelCounter = 0 ;
4203- m_hasInlineAsm = context->m_DriverInfo .SupportInlineAssembly () && context->m_instrTypes .hasInlineAsm ;
4204-
4205- vbuilder = nullptr ;
4206- vAsmTextBuilder = nullptr ;
4207- TARGET_PLATFORM VISAPlatform = GetVISAPlatform (&(context->platform ));
4204+ m_aliasesMap.clear ();
4205+ m_encoderState.m_SubSpanDestination = false ;
4206+ CodeGenContext* context = m_program->GetContext ();
4207+ m_encoderState.m_secondHalf = false ;
4208+ m_enableVISAdump = false ;
4209+ labelMap.clear ();
4210+ labelMap.resize (m_program->entry ->size (), nullptr );
4211+ labelCounter = 0 ;
4212+ m_hasInlineAsm = context->m_DriverInfo .SupportInlineAssembly () && context->m_instrTypes .hasInlineAsm ;
4213+
4214+ vbuilder = nullptr ;
4215+ vAsmTextBuilder = nullptr ;
4216+ TARGET_PLATFORM VISAPlatform = GetVISAPlatform (&(context->platform ));
42084217
4209- SetVISAWaTable (m_program->m_Platform ->getWATable ());
4218+ SetVISAWaTable (m_program->m_Platform ->getWATable ());
42104219
4211- llvm::SmallVector<const char *, 10 > params;
4212- if (!m_hasInlineAsm)
4213- {
4214- // Asm text writer mode doesnt need dump params
4215- InitBuildParams (params);
4216- }
4220+ llvm::SmallVector<const char *, 10 > params;
4221+ if (!m_hasInlineAsm)
4222+ {
4223+ // Asm text writer mode doesnt need dump params
4224+ InitBuildParams (params);
4225+ }
42174226
4218- COMPILER_TIME_START (m_program->GetContext (), TIME_CG_vISACompile);
4219- bool enableVISADump = IGC_IS_FLAG_ENABLED (EnableVISASlowpath) || IGC_IS_FLAG_ENABLED (ShaderDumpEnable);
4220- auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4221- auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4222- V (CreateVISABuilder (vbuilder, builderMode, builderOpt, VISAPlatform, params.size (), params.data (), &m_WaTable));
4227+ COMPILER_TIME_START (m_program->GetContext (), TIME_CG_vISACompile);
4228+ bool enableVISADump = IGC_IS_FLAG_ENABLED (EnableVISASlowpath) || IGC_IS_FLAG_ENABLED (ShaderDumpEnable);
4229+ auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4230+ auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4231+ V (CreateVISABuilder (vbuilder, builderMode, builderOpt, VISAPlatform, params.size (), params.data (), &m_WaTable));
42234232
4224- InitVISABuilderOptions (VISAPlatform, canAbortOnSpill, hasStackCall);
4233+ InitVISABuilderOptions (VISAPlatform, canAbortOnSpill, hasStackCall);
42254234
4226- // Pass all build options to builder
4227- SetBuilderOptions (vbuilder);
4235+ // Pass all build options to builder
4236+ SetBuilderOptions (vbuilder);
42284237
4229- vKernel = nullptr ;
4238+ vKernel = nullptr ;
42304239
4231- std::string kernelName = m_program->entry ->getName ();
4232- if (context->m_instrTypes .hasDebugInfo )
4233- {
4234- // This metadata node is added by TransformBlocks pass for device side
4235- // enqueue feature of OCL2.0+.
4236- // The problem is that for device side enqueue, kernel name used in
4237- // IGC differs the one used to create JIT kernel. This leads to different
4238- // kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4239- // merge the two together. With this metadata node we create a mapping
4240- // between the two names and when debug info is enabled, make JIT use
4241- // same name as IGC.
4242- // Names earlier -
4243- // ParentKernel_dispatch_0 in dbg and
4244- // __ParentKernel_block_invoke in elf
4245- // when kernel name is ParentKernel
4246- //
4247- auto md = m_program->entry ->getParent ()->getNamedMetadata (" igc.device.enqueue" );
4248- if (md)
4240+ std::string kernelName = m_program->entry ->getName ();
4241+ if (context->m_instrTypes .hasDebugInfo )
42494242 {
4250- for (unsigned int i = 0 ; i < md->getNumOperands (); i++)
4243+ // This metadata node is added by TransformBlocks pass for device side
4244+ // enqueue feature of OCL2.0+.
4245+ // The problem is that for device side enqueue, kernel name used in
4246+ // IGC differs the one used to create JIT kernel. This leads to different
4247+ // kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4248+ // merge the two together. With this metadata node we create a mapping
4249+ // between the two names and when debug info is enabled, make JIT use
4250+ // same name as IGC.
4251+ // Names earlier -
4252+ // ParentKernel_dispatch_0 in dbg and
4253+ // __ParentKernel_block_invoke in elf
4254+ // when kernel name is ParentKernel
4255+ //
4256+ auto md = m_program->entry ->getParent ()->getNamedMetadata (" igc.device.enqueue" );
4257+ if (md)
42514258 {
4252- auto mdOpnd = md->getOperand (i);
4253- auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand (1 ));
4254- if (first &&
4255- first->getString ().equals (kernelName))
4259+ for (unsigned int i = 0 ; i < md->getNumOperands (); i++)
42564260 {
4257- auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand (0 ));
4258- if (second)
4261+ auto mdOpnd = md->getOperand (i);
4262+ auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand (1 ));
4263+ if (first &&
4264+ first->getString ().equals (kernelName))
42594265 {
4260- kernelName = second->getString ();
4266+ auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand (0 ));
4267+ if (second)
4268+ {
4269+ kernelName = second->getString ();
4270+ }
42614271 }
42624272 }
42634273 }
42644274 }
4265- }
42664275
4267- std::string asmName;
4268- if (m_enableVISAdump || context->m_instrTypes .hasDebugInfo )
4269- {
4270- // vISA does not support string of length >= 255. Truncate if this exceeds
4271- // the limit. Note that vISA may append an extension, so relax it to a
4272- // random number 240 here.
4273- const int MAX_VISA_STRING_LENGTH = 240 ;
4274- if (kernelName.size () >= MAX_VISA_STRING_LENGTH)
4276+ std::string asmName;
4277+ if (m_enableVISAdump || context->m_instrTypes .hasDebugInfo )
42754278 {
4276- kernelName.resize (MAX_VISA_STRING_LENGTH);
4279+ // vISA does not support string of length >= 255. Truncate if this exceeds
4280+ // the limit. Note that vISA may append an extension, so relax it to a
4281+ // random number 240 here.
4282+ const int MAX_VISA_STRING_LENGTH = 240 ;
4283+ if (kernelName.size () >= MAX_VISA_STRING_LENGTH)
4284+ {
4285+ kernelName.resize (MAX_VISA_STRING_LENGTH);
4286+ }
4287+ asmName = GetDumpFileName (" asm" );
4288+ }
4289+ else
4290+ {
4291+ kernelName = " kernel" ;
4292+ asmName = " kernel.asm" ;
42774293 }
4278- asmName = GetDumpFileName (" asm" );
4279- }
4280- else
4281- {
4282- kernelName = " kernel" ;
4283- asmName = " kernel.asm" ;
4284- }
42854294
4286- V (vbuilder->AddKernel (vKernel, kernelName.c_str ()));
4287- V (vKernel->AddKernelAttribute (" OutputAsmPath" , asmName.length (), asmName.c_str ()));
4295+ V (vbuilder->AddKernel (vKernel, kernelName.c_str ()));
4296+ V (vKernel->AddKernelAttribute (" OutputAsmPath" , asmName.length (), asmName.c_str ()));
42884297
4289- vMainKernel = vKernel;
4298+ vMainKernel = vKernel;
42904299
4291- auto gtpin_init = context->gtpin_init ;
4292- if (gtpin_init)
4293- {
4294- vKernel->SetGTPinInit (gtpin_init);
4295- }
4300+ auto gtpin_init = context->gtpin_init ;
4301+ if (gtpin_init)
4302+ {
4303+ vKernel->SetGTPinInit (gtpin_init);
4304+ }
42964305
4297- // Right now only 1 main function in the kernel
4306+ // Right now only 1 main function in the kernel
42984307 VISA_LabelOpnd* functionLabel = nullptr ;
4299- V (vKernel->CreateVISALabelVar (functionLabel, " main" , LABEL_SUBROUTINE));
4300- V (vKernel->AppendVISACFLabelInst (functionLabel));
4301-
4302- V (vKernel->CreateVISASurfaceVar (dummySurface, " " , 1 ));
4308+ V (vKernel->CreateVISALabelVar (functionLabel, " main" , LABEL_SUBROUTINE));
4309+ V (vKernel->AppendVISACFLabelInst (functionLabel));
43034310
4304- V (vKernel->CreateVISASamplerVar (samplervar , " " , 1 ));
4311+ V (vKernel->CreateVISASurfaceVar (dummySurface , " " , 1 ));
43054312
4306- CEncoder::SetFloatDenormMode (vKernel, context->m_floatDenormMode16 ,
4307- context->m_floatDenormMode32 ,
4308- context->m_floatDenormMode64 );
4313+ V (vKernel->CreateVISASamplerVar (samplervar, " " , 1 ));
43094314
4310- // The instruction is generated only if mode != FLOAT_ROUND_TO_NEAREST_EVEN
4311- CEncoder::SetFloatRoundingMode (
4312- getEncoderRoundingMode (FLOAT_ROUND_TO_NEAREST_EVEN),
4313- getEncoderRoundingMode (static_cast <Float_RoundingMode>(
4314- context->getModuleMetaData ()->compOpt .FloatRoundingMode )));
4315+ // Set float denorm modes and rounding modes as default
4316+ initCR (vKernel);
43154317 }
43164318
43174319 void CEncoder::SetKernelStackPointer64 ()
0 commit comments