@@ -192,7 +192,7 @@ void CHullShader::AllocateEightPatchPayload()
192192
193193 assert (offset % getGRFSize () == 0 );
194194 ProgramOutput ()->m_startReg = offset / getGRFSize ();
195-
195+
196196 // allocate space for NOS constants and pushed constants
197197 AllocateConstants3DShader (offset);;
198198
@@ -214,7 +214,7 @@ void CHullShader::AllocateSinglePatchPayload()
214214 uint offset = 0 ;
215215
216216 // R0 is always allocated as a predefined variable. Increase offset for R0
217- assert (m_R0);
217+ assert (m_R0);
218218 offset += getGRFSize ();
219219
220220 // if m_pURBReadHandlesReg != nullptr, then we need to allocate ( (m_pOutputControlPointCount - 1)/8 + 1 ) registers for input handles
@@ -303,7 +303,7 @@ CVariable* CHullShader::GetURBReadHandlesReg()
303303 {
304304 m_pURBReadHandlesReg = GetNewVariable (
305305 numLanes (m_SIMDSize) * ( m_pNumURBReadHandleGRF ),
306- ISA_TYPE_UD,
306+ ISA_TYPE_UD,
307307 EALIGN_GRF);
308308 }
309309 return m_pURBReadHandlesReg;
@@ -362,18 +362,18 @@ CVariable* CHullShader::GetURBInputHandle(CVariable* pVertexIndex)
362362 }
363363}
364364
365- QuadEltUnit CHullShader::GetFinalGlobalOffet (QuadEltUnit globalOffset)
366- {
365+ QuadEltUnit CHullShader::GetFinalGlobalOffet (QuadEltUnit globalOffset)
366+ {
367367 return globalOffset;
368368}
369369
370370uint32_t CHullShader::GetMaxNumOfPushedInputs () const
371- {
371+ {
372372 uint numberOfPatches = (m_properties.m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1 ;
373373
374- // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
374+ // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
375375 // in current dispatch mode for current topology ( InputPatch size ).
376- uint32_t maxNumOfPushedInputAttributesPerICP =
376+ uint32_t maxNumOfPushedInputAttributesPerICP =
377377 m_pMaxNumOfPushedInputs / (m_properties.m_pInputControlPointCount *numberOfPatches);
378378
379379 // Input attributes can be pushed only in pairs, so we need to round down the limit.
@@ -383,28 +383,28 @@ uint32_t CHullShader::GetMaxNumOfPushedInputs() const
383383 // They can be pushed only in pairs.
384384 uint32_t reqNumOfInputAttributesPerICP = iSTD::Align (m_properties.m_pMaxInputSignatureCount , 2 );
385385
386- // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
386+ // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
387387 // in case of SGV inputs have to be taken into consideration (will be done in next step).
388388 // reqNumOfInputAttributes += HeaderSize().Count();
389389
390390 // Determine ACTUAL number of attributes that can be pushed.
391391 // If the required number of input attributes is less that maximum potential number,
392392 // than all of the will be pushed.
393- uint32_t actualNumOfPushedInputAttributesPerICP =
393+ uint32_t actualNumOfPushedInputAttributesPerICP =
394394 iSTD::Min (reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP);
395395
396396 return actualNumOfPushedInputAttributesPerICP;
397397}
398398
399- void CHullShader::EmitPatchConstantInput (llvm::Instruction* pInst, CVariable* pDest )
399+ void CHullShader::EmitPatchConstantInput (llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar )
400400{
401401 bool readHeader = ((dyn_cast<GenIntrinsicInst>(pInst))->getIntrinsicID () == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead);
402402
403403 // patch constant input read
404404 llvm::Value* pIndirectVertexIdx = pInst->getOperand (0 );
405405
406- CVariable* pPerSlotOffsetVar = nullptr ;
407- QuadEltUnit attributeOffset (0 );
406+ pPerSlotOffsetVar = nullptr ;
407+ attributeOffset = QuadEltUnit (0 );
408408
409409 // {BDW - WA, HS} Do not set pPerSlotOffset or change globalOffset to read TessFactors from URB.
410410 if (!readHeader)
@@ -422,25 +422,23 @@ void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pD
422422
423423 attributeOffset = attributeOffset + GetURBHeaderSize ();
424424 }
425-
426- URBReadPatchConstOrOutputCntrlPtInput (pPerSlotOffsetVar, attributeOffset, false , pDest);
427425}
428426
429- void CHullShader::EmitOutputControlPointInput (llvm::Instruction* pInst, CVariable* pDest )
427+ void CHullShader::EmitOutputControlPointInput (llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar )
430428{
431429 // patch constant input read
432430 llvm::Value* pIndirectVertexIdx = pInst->getOperand (0 );
433431 llvm::Value* pAttribIdx = pInst->getOperand (1 );
434432
435- CVariable* pPerSlotOffsetVar = nullptr ;
436- QuadEltUnit attributeOffset ( GetPatchConstantOutputSize () );
433+ pPerSlotOffsetVar = nullptr ;
434+ attributeOffset = GetPatchConstantOutputSize ();
437435
438436 // Compute offset from vertex index
439437 if (llvm::ConstantInt* pConstVertexIdx = llvm::dyn_cast<llvm::ConstantInt>(pIndirectVertexIdx))
440438 {
441439 // attribute index is a constant, we can compute the URB read offset directly
442- attributeOffset =
443- attributeOffset +
440+ attributeOffset =
441+ attributeOffset +
444442 QuadEltUnit (int_cast<unsigned int >(pConstVertexIdx->getZExtValue ())) * m_properties.m_pMaxOutputSignatureCount ;
445443 }
446444 else
@@ -459,7 +457,7 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
459457 }
460458 }
461459
462- // Compute additionall offset coming from atribute index
460+ // Compute additional offset coming from attribute index
463461 if (llvm::ConstantInt* pConstAttribIdx = llvm::dyn_cast<llvm::ConstantInt>(pAttribIdx))
464462 {
465463 // attribute offset is a constant, we can compute the URB read offset directly
@@ -482,53 +480,10 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
482480 pPerSlotOffsetVar = GetSymbol (pAttribIdx);
483481 }
484482 }
485-
486- URBReadPatchConstOrOutputCntrlPtInput (pPerSlotOffsetVar, attributeOffset, false , pDest);
487- }
488-
489- void CHullShader::URBReadPatchConstOrOutputCntrlPtInput (
490- CVariable* pPerSlotOffsetVar,
491- QuadEltUnit globalOffset,
492- bool EOT,
493- CVariable* pDest )
494- {
495- CEncoder& encoder = GetEncoder ();
496-
497- const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr ;
498- // Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
499- const Unit<Element> payloadSize (hasPerSlotOffsets ? 2 : 1 );
500- CVariable* pPayload =
501- GetNewVariable (payloadSize.Count () * numLanes (m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF);
502-
503- // get the register with URBHandles
504- CopyVariable (pPayload, m_pURBWriteHandleReg);
505-
506- // If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
507- if (hasPerSlotOffsets)
508- {
509- CopyVariable (pPayload, pPerSlotOffsetVar, 1 );
510- }
511-
512- const Unit<Element> messageLength = payloadSize;
513- const Unit<Element> responseLength (pDest->GetNumberElement ()/numLanes (m_SIMDSize));
514- const uint desc = UrbMessage (
515- messageLength.Count (),
516- responseLength.Count (),
517- EOT,
518- hasPerSlotOffsets,
519- false ,
520- globalOffset.Count (),
521- EU_GEN8_URB_OPCODE_SIMD8_READ);
522-
523- const uint exDesc = EU_MESSAGE_TARGET_URB | (EOT ? 1 << 5 : 0 );
524- CVariable* pMessDesc = ImmToVariable (desc, ISA_TYPE_UD);
525-
526- encoder.Send (pDest, pPayload, exDesc, pMessDesc);
527- encoder.Push ();
528483}
529484
530485// / Returns the size of the output vertex.
531- // / Unit: 16B = 4 DWORDs
486+ // / Unit: 16B = 4 DWORDs
532487// / Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled
533488// / Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2.
534489QuadEltUnit CHullShader::GetPatchConstantOutputSize () const
0 commit comments