@@ -7780,7 +7780,7 @@ int IR_Builder::translateVISARTWrite3DInst(
77807780 " R,G,B,A for RT write must have the same type" );
77817781 }
77827782
7783- auto mult = (execSize == 8 ) ? 1 : 2 ;
7783+ auto mult = (execSize == getNativeExecSize () ? 1 : 2 ) ;
77847784 mult = (FP16Data)? 1 : mult;
77857785
77867786 // RGBA sr0Alpha take up one GRF in SIMD8 and SIMD16 modes.
@@ -7795,29 +7795,23 @@ int IR_Builder::translateVISARTWrite3DInst(
77957795 ++numRows;
77967796 }
77977797
7798- if ( cntrls.oMPresent && mult == 2 )
7798+ if ( cntrls.oMPresent && mult == 2 )
77997799 {
78007800 // oM is always 1 row irrespective of execSize
78017801 numRows--;
78027802 }
78037803
78047804 // although for now HW only supports stencil in SIMD8 mode
7805- if ( cntrls.isStencil && mult == 2 )
7805+ if (cntrls.isStencil && mult == 2 )
78067806 {
78077807 // stencil is always 1 row irrespective of execSize
78087808 numRows--;
78097809 }
78107810
7811- #define HEADER_OFFSET GENX_GRF_REG_SIZ * 2
7812- #define HEADER_SIZE GENX_SAMPLER_IO_SZ * 2
7813- #define RT_HEADER_SIZE 2
7814- uint8_t headerSizeInDwords = HEADER_SIZE;
7815- if (emask == vISA_EMASK_M5_NM || emask == vISA_EMASK_M5)
7816- {
7817- // For SIMD32 case when RT Write is split in to two SIMD16
7818- // header information is expected in R0/R2 registers
7819- headerSizeInDwords += GENX_SAMPLER_IO_SZ;
7820- }
7811+ // header is always 64 byte
7812+ const int numDWInHeader = 16 ;
7813+ const int headerBytes = numDWInHeader * sizeof (int );
7814+ const int numHeaderGRF = numDWInHeader / getNativeExecSize ();
78217815
78227816 /*
78237817 All other values should be set by default.
@@ -7830,7 +7824,7 @@ int IR_Builder::translateVISARTWrite3DInst(
78307824 if (needsHeaderForMRT || cntrls.isSampleIndex )
78317825 {
78327826 useHeader = true ;
7833- numRows += RT_HEADER_SIZE ;
7827+ numRows += numHeaderGRF ;
78347828 }
78357829
78367830 bool useSplitSend = useSends ();
@@ -7850,10 +7844,10 @@ int IR_Builder::translateVISARTWrite3DInst(
78507844 if (useHeader)
78517845 {
78527846 // subtracting Header
7853- numRows -= RT_HEADER_SIZE ;
7847+ numRows -= numHeaderGRF ;
78547848 // creating header
7855- msg = createSendPayloadDcl (GENX_SAMPLER_IO_SZ * RT_HEADER_SIZE , Type_UD);
7856- msgF = createSendPayloadDcl (GENX_SAMPLER_IO_SZ * RT_HEADER_SIZE , Type_F);
7849+ msg = createSendPayloadDcl (numDWInHeader , Type_UD);
7850+ msgF = createSendPayloadDcl (numDWInHeader , Type_F);
78577851 msgF->setAliasDeclare (msg, 0 );
78587852 }
78597853 // creating payload
@@ -7876,19 +7870,19 @@ int IR_Builder::translateVISARTWrite3DInst(
78767870 msgF->setAliasDeclare (msg, 0 );
78777871
78787872 // creating payload declarations.
7879- payloadUD = createSendPayloadDcl (numElts - (useHeader ? HEADER_SIZE : 0 ), Type_UD);
7880- payloadFOrHF = createSendPayloadDcl (numElts - (useHeader ? HEADER_SIZE : 0 ), FP16Data ? Type_HF : Type_F);
7881- payloadUW = createSendPayloadDcl (numElts - (useHeader ? HEADER_SIZE : 0 ), Type_UW);
7873+ payloadUD = createSendPayloadDcl (numElts - (useHeader ? numDWInHeader : 0 ), Type_UD);
7874+ payloadFOrHF = createSendPayloadDcl (numElts - (useHeader ? numDWInHeader : 0 ), FP16Data ? Type_HF : Type_F);
7875+ payloadUW = createSendPayloadDcl (numElts - (useHeader ? numDWInHeader : 0 ), Type_UW);
78827876 payloadF = createSendPayloadDcl (numElts, Type_F);
78837877
78847878 // setting them to alias a top level decl with offset past the header
7885- payloadUD->setAliasDeclare (msg, useHeader ? HEADER_OFFSET : 0 );
7886- payloadFOrHF->setAliasDeclare (msg, useHeader ? HEADER_OFFSET : 0 );
7887- payloadUW->setAliasDeclare (msg, useHeader ? HEADER_OFFSET : 0 );
7879+ payloadUD->setAliasDeclare (msg, useHeader ? headerBytes : 0 );
7880+ payloadFOrHF->setAliasDeclare (msg, useHeader ? headerBytes : 0 );
7881+ payloadUW->setAliasDeclare (msg, useHeader ? headerBytes : 0 );
78887882 payloadF->setAliasDeclare (payloadUD, 0 );
78897883 }
78907884
7891- if ( useHeader )
7885+ if ( useHeader)
78927886 {
78937887 ASSERT_USER (r1HeaderOpnd, " Second GRF for header that was passed in is NULL." );
78947888 G4_DstRegRegion* payloadRegRgn = createDst (msg->getRegVar (), 0 , 0 , 1 , Type_UD);
@@ -7908,14 +7902,14 @@ int IR_Builder::translateVISARTWrite3DInst(
79087902#define SAMPLE_INDEX_OFFSET 6
79097903 if (cntrls.isSampleIndex )
79107904 {
7911- G4_Declare * tmpDcl = createTempVar (2 , Type_UD, Any);
7912- G4_DstRegRegion * tmpDst = createDst (tmpDcl->getRegVar (), 0 , 0 , 1 , Type_UD);
7905+ G4_Declare* tmpDcl = createTempVar (2 , Type_UD, Any);
7906+ G4_DstRegRegion* tmpDst = createDst (tmpDcl->getRegVar (), 0 , 0 , 1 , Type_UD);
79137907
79147908 createBinOp (G4_shl, 1 , tmpDst, sampleIndexOpnd, createImm (SAMPLE_INDEX_OFFSET, Type_UD), InstOpt_WriteEnable, true );
79157909
79167910 G4_DstRegRegion* payloadUDRegRgn = createDst (msg->getRegVar (), 0 , 0 , 1 , Type_UD);
7917- G4_SrcRegRegion * tmpSrc = createSrcRegRegion (Mod_src_undef, Direct, tmpDcl->getRegVar (), 0 , 0 , getRegionScalar (), Type_UD);
7918- G4_SrcRegRegion * payloadSrc = createSrcRegRegion (Mod_src_undef, Direct, msg->getRegVar (), 0 , 0 , getRegionScalar (), Type_UD);
7911+ G4_SrcRegRegion* tmpSrc = createSrcRegRegion (Mod_src_undef, Direct, tmpDcl->getRegVar (), 0 , 0 , getRegionScalar (), Type_UD);
7912+ G4_SrcRegRegion* payloadSrc = createSrcRegRegion (Mod_src_undef, Direct, msg->getRegVar (), 0 , 0 , getRegionScalar (), Type_UD);
79197913 createBinOp (G4_or, 1 , payloadUDRegRgn, payloadSrc, tmpSrc, InstOpt_WriteEnable, true );
79207914 }
79217915
@@ -7924,12 +7918,12 @@ int IR_Builder::translateVISARTWrite3DInst(
79247918 G4_DstRegRegion* dstRTIRgn = createDst (msg->getRegVar (), 0 , 2 , 1 , Type_UD);
79257919
79267920 G4_INST* rtiMovInst = createMov (1 , dstRTIRgn, rtIndex, InstOpt_NoOpt, true );
7927- rtiMovInst->setOptionOn ( InstOpt_WriteEnable );
7921+ rtiMovInst->setOptionOn (InstOpt_WriteEnable);
79287922 }
79297923
79307924 // if header is used, then predication value will need to be stored
79317925 // in the header
7932- if (useHeader && (pred || cntrls.isHeaderMaskfromCe0 ))
7926+ if (useHeader && (pred || cntrls.isHeaderMaskfromCe0 ))
79337927 {
79347928 // moving pixelMask in to payload
79357929 G4_DstRegRegion* dstPixelMaskRgn = createDst (
@@ -7952,20 +7946,20 @@ int IR_Builder::translateVISARTWrite3DInst(
79527946 // M0 : WAce0.0; M16 : WAce0.1
79537947 createMov (1 , flag, createImm (0 , Type_UW), InstOpt_WriteEnable, true );
79547948
7955- G4_SrcRegRegion * r0_0 = createSrcRegRegion (
7949+ G4_SrcRegRegion* r0_0 = createSrcRegRegion (
79567950 Mod_src_undef, Direct,
79577951 getRealR0 ()->getRegVar (), 0 , 0 ,
79587952 getRegionStride1 (), Type_UW);
7959- G4_SrcRegRegion * r0_1 = createSrcRegRegion (
7953+ G4_SrcRegRegion* r0_1 = createSrcRegRegion (
79607954 Mod_src_undef, Direct,
79617955 getRealR0 ()->getRegVar (), 0 , 0 ,
79627956 getRegionStride1 (), Type_UW);
7963- G4_DstRegRegion * nullDst = createNullDst (Type_UW);
7957+ G4_DstRegRegion* nullDst = createNullDst (Type_UW);
79647958 G4_CondMod* flagCM = createCondMod (Mod_e, flagVar, 0 );
79657959 createInst (NULL , G4_cmp, flagCM, false , 16 , nullDst,
79667960 r0_0, r0_1, Option);
79677961
7968- G4_SrcRegRegion * flagSrc = createSrcRegRegion (
7962+ G4_SrcRegRegion* flagSrc = createSrcRegRegion (
79697963 Mod_src_undef, Direct,
79707964 flagVar, 0 ,
79717965 Option == InstOpt_M16 ? 1 : 0 ,
@@ -7975,20 +7969,20 @@ int IR_Builder::translateVISARTWrite3DInst(
79757969 createMov (1 , dstPixelMaskRgn, flagSrc, InstOpt_WriteEnable, true );
79767970 };
79777971
7978- G4_SrcRegRegion * pixelMask = NULL ;
7972+ G4_SrcRegRegion* pixelMask = NULL ;
79797973 if (emask == vISA_EMASK_M5_NM || emask == vISA_EMASK_M5)
79807974 {
79817975 if (pred)
79827976 {
79837977 // this is a Second half of a SIMD32 RT write. We need to get second half of flag register.
79847978 // mov whole register in to GRF, move second word of it in to payload.
79857979
7986- G4_SrcRegRegion * pixelMaskTmp = createSrcRegRegion (
7987- Mod_src_undef, Direct,
7988- pred->getBase ()->asRegVar (), 0 , 0 ,
7989- getRegionScalar (), Type_UD);
7990- G4_Declare * tmpDcl = createTempVar (1 , Type_UD, Any);
7991- G4_DstRegRegion * tmpDst = createDst (tmpDcl->getRegVar (), 0 , 0 , 1 , Type_UD);
7980+ G4_SrcRegRegion* pixelMaskTmp = createSrcRegRegion (
7981+ Mod_src_undef, Direct,
7982+ pred->getBase ()->asRegVar (), 0 , 0 ,
7983+ getRegionScalar (), Type_UD);
7984+ G4_Declare* tmpDcl = createTempVar (1 , Type_UD, Any);
7985+ G4_DstRegRegion* tmpDst = createDst (tmpDcl->getRegVar (), 0 , 0 , 1 , Type_UD);
79927986 createMov (1 , tmpDst, pixelMaskTmp, InstOpt_WriteEnable, true );
79937987
79947988 pixelMask = createSrcRegRegion (Mod_src_undef, Direct,
@@ -8005,7 +7999,7 @@ int IR_Builder::translateVISARTWrite3DInst(
80057999 }
80068000 else
80078001 {
8008- G4_SrcRegRegion * ce0 = createSrcRegRegion (
8002+ G4_SrcRegRegion* ce0 = createSrcRegRegion (
80098003 Mod_src_undef, Direct,
80108004 phyregpool.getMask0Reg (), 0 , 0 ,
80118005 getRegionScalar (), Type_UD);
@@ -8035,7 +8029,7 @@ int IR_Builder::translateVISARTWrite3DInst(
80358029 }
80368030 else
80378031 {
8038- G4_SrcRegRegion * ce0 = createSrcRegRegion (
8032+ G4_SrcRegRegion* ce0 = createSrcRegRegion (
80398033 Mod_src_undef, Direct,
80408034 phyregpool.getMask0Reg (), 0 , 0 ,
80418035 getRegionScalar (), Type_UD);
@@ -8053,34 +8047,34 @@ int IR_Builder::translateVISARTWrite3DInst(
80538047
80548048 // setting first DWORD of MHC_RT_C0 - Render Target Message Header Control
80558049
8056- if ( cntrls.isStencil )
8050+ if ( cntrls.isStencil )
80578051 {
8058- orImmVal = ( 0x1 << 14 );
8052+ orImmVal = (0x1 << 14 );
80598053 }
80608054
8061- if ( cntrls.zPresent )
8055+ if ( cntrls.zPresent )
80628056 {
8063- orImmVal = ( 0x1 << 13 );
8057+ orImmVal = (0x1 << 13 );
80648058 }
80658059
8066- if ( cntrls.oMPresent )
8060+ if ( cntrls.oMPresent )
80678061 {
8068- orImmVal |= ( 0x1 << 12 );
8062+ orImmVal |= (0x1 << 12 );
80698063 }
80708064
8071- if ( cntrls.s0aPresent )
8065+ if ( cntrls.s0aPresent )
80728066 {
8073- orImmVal |= ( 0x1 << 11 );
8067+ orImmVal |= (0x1 << 11 );
80748068 }
80758069
8076- if ( orImmVal != 0 )
8070+ if ( orImmVal != 0 )
80778071 {
80788072 G4_SrcRegRegion* immSrcRegRgn = createSrcRegRegion (Mod_src_undef, Direct, msg->getRegVar (), 0 , 0 , getRegionScalar (), Type_UD);
80798073
80808074 G4_DstRegRegion* immDstRegRgn = createDst (msg->getRegVar (), 0 , 0 , 1 , Type_UD);
80818075
8082- G4_INST* immOrInst = createBinOp (G4_or, 1 , immDstRegRgn, immSrcRegRgn, createImm ( orImmVal, Type_UD ), InstOpt_WriteEnable, true );
8083- immOrInst->setOptionOn ( InstOpt_WriteEnable );
8076+ G4_INST* immOrInst = createBinOp (G4_or, 1 , immDstRegRgn, immSrcRegRgn, createImm (orImmVal, Type_UD), InstOpt_WriteEnable, true );
8077+ immOrInst->setOptionOn (InstOpt_WriteEnable);
80848078 }
80858079 }
80868080
@@ -8100,44 +8094,44 @@ int IR_Builder::translateVISARTWrite3DInst(
81008094 A->isNullReg ())
81018095 canCoalesce = false ;
81028096
8103- if ( canCoalesce && cntrls.s0aPresent )
8097+ if ( canCoalesce && cntrls.s0aPresent )
81048098 {
81058099 prevRawOpnd = s0a;
81068100 offset = getByteOffsetSrcRegion (s0a);
81078101 }
81088102
8109- if ( canCoalesce && cntrls.oMPresent )
8103+ if ( canCoalesce && cntrls.oMPresent )
81108104 {
81118105 // by default it will check based on first opnd type, but that can be HF, F, we need second operand type
81128106 // according to spec oM is UW
8113- canCoalesce = checkIfRegionsAreConsecutive ( prevRawOpnd, oM, execSize, oM->getType () );
8107+ canCoalesce = checkIfRegionsAreConsecutive (prevRawOpnd, oM, execSize, oM->getType ());
81148108 prevRawOpnd = oM;
8115- if ( offset == UNINITIALIZED_DWORD )
8109+ if ( offset == UNINITIALIZED_DWORD)
81168110 {
81178111 offset = getByteOffsetSrcRegion (oM);
81188112 }
81198113 }
81208114
8121- if ( canCoalesce )
8115+ if ( canCoalesce)
81228116 {
8123- if ( execSize == 16 && cntrls.oMPresent )
8117+ if ( execSize == 16 && cntrls.oMPresent )
81248118 {
81258119 // oM is 1 GRF for SIMD16 since it is UW type
8126- canCoalesce = checkIfRegionsAreConsecutive ( oM, R, execSize, Type_UW );
8120+ canCoalesce = checkIfRegionsAreConsecutive (oM, R, execSize, Type_UW);
81278121 prevRawOpnd = R;
81288122 }
81298123 else
81308124 {
8131- canCoalesce = checkIfRegionsAreConsecutive ( prevRawOpnd, R, execSize );
8125+ canCoalesce = checkIfRegionsAreConsecutive (prevRawOpnd, R, execSize);
81328126 prevRawOpnd = R;
81338127 }
81348128
8135- if ( offset == UNINITIALIZED_DWORD )
8129+ if ( offset == UNINITIALIZED_DWORD)
81368130 {
81378131 offset = getByteOffsetSrcRegion (prevRawOpnd);
81388132 }
81398133
8140- if ( canCoalesce )
8134+ if ( canCoalesce)
81418135 {
81428136 auto tempExecSize = execSize;
81438137 if (FP16Data && execSize == 8 )
@@ -8146,7 +8140,7 @@ int IR_Builder::translateVISARTWrite3DInst(
81468140 checkIfRegionsAreConsecutive (G, B, tempExecSize) &&
81478141 checkIfRegionsAreConsecutive (B, A, tempExecSize);
81488142 prevRawOpnd = A;
8149- if ( offset == UNINITIALIZED_DWORD )
8143+ if ( offset == UNINITIALIZED_DWORD)
81508144 {
81518145 offset = getByteOffsetSrcRegion (A);
81528146 if (FP16Data && execSize == 8 )
@@ -8155,15 +8149,15 @@ int IR_Builder::translateVISARTWrite3DInst(
81558149 }
81568150 }
81578151
8158- if ( canCoalesce && cntrls.zPresent )
8152+ if ( canCoalesce && cntrls.zPresent )
81598153 {
8160- canCoalesce = checkIfRegionsAreConsecutive ( prevRawOpnd, Z, execSize );
8154+ canCoalesce = checkIfRegionsAreConsecutive (prevRawOpnd, Z, execSize);
81618155 prevRawOpnd = Z;
81628156 }
81638157
8164- if ( canCoalesce && cntrls.isStencil )
8158+ if ( canCoalesce && cntrls.isStencil )
81658159 {
8166- canCoalesce = checkIfRegionsAreConsecutive ( prevRawOpnd, S, execSize );
8160+ canCoalesce = checkIfRegionsAreConsecutive (prevRawOpnd, S, execSize);
81678161 prevRawOpnd = S;
81688162 }
81698163
@@ -8299,7 +8293,7 @@ int IR_Builder::translateVISARTWrite3DInst(
82998293 if (useHeader)
83008294 {
83018295 m0 = Create_Src_Opnd_From_Dcl (msg, getRegionStride1 ());
8302- msgDesc = createSendMsgDesc (fc, 0 , RT_HEADER_SIZE , SFID::DP_WRITE, numRows,
8296+ msgDesc = createSendMsgDesc (fc, 0 , numHeaderGRF , SFID::DP_WRITE, numRows,
83038297 0 , SendAccess::WRITE_ONLY, surface);
83048298 msgDesc->setHeaderPresent (useHeader);
83058299 }
0 commit comments