Skip to content

Commit 31a0b24

Browse files
weiyu-chensys_zuul
authored andcommitted
Refactor pixel shader code gen to avoid hard-coding SIMD size and GRF size.
Change-Id: Ibc51a07dcf6c825c4e3e0fd68e69a921ef64c392
1 parent bd5ae01 commit 31a0b24

File tree

2 files changed

+67
-73
lines changed

2 files changed

+67
-73
lines changed

IGC/Compiler/CISACodeGen/PixelShaderCodeGen.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ namespace IGC
131131
uint offset = 0;
132132
//R0 is always allocated as a predefined variable. Increase offset for R0
133133
assert(m_R0);
134-
offset += getGRFSize();
134+
offset += 32; //R0 is always 32 byte regardless of register size
135135

136136
assert(m_R1);
137137
if (m_Signature)
@@ -141,7 +141,7 @@ namespace IGC
141141
for (uint i = 0; i < m_R1->GetNumberInstance(); i++)
142142
{
143143
AllocateInput(m_R1, offset, i);
144-
offset += getGRFSize();
144+
offset += (i == 0 && m_R1->GetNumberInstance() > 1) ? getGRFSize() : 32;
145145
}
146146

147147
for (uint i = 0; i < m_numberInstance; i++)
@@ -814,7 +814,7 @@ namespace IGC
814814
void CPixelShader::PreCompile()
815815
{
816816
CreateImplicitArgs();
817-
m_R1 = GetNewVariable((getGRFSize() >> 2), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
817+
m_R1 = GetNewVariable(8, ISA_TYPE_D, EALIGN_HWORD, false, m_numberInstance);
818818
CodeGenContext* ctx = GetContext();
819819

820820
// make sure the return block is properly set

visa/TranslationInterface.cpp

Lines changed: 64 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -7780,7 +7780,7 @@ int IR_Builder::translateVISARTWrite3DInst(
77807780
"R,G,B,A for RT write must have the same type");
77817781
}
77827782

7783-
auto mult = (execSize == 8)? 1 : 2;
7783+
auto mult = (execSize == getNativeExecSize() ? 1 : 2);
77847784
mult = (FP16Data)? 1 : mult;
77857785

77867786
//RGBA sr0Alpha take up one GRF in SIMD8 and SIMD16 modes.
@@ -7795,29 +7795,23 @@ int IR_Builder::translateVISARTWrite3DInst(
77957795
++numRows;
77967796
}
77977797

7798-
if( cntrls.oMPresent && mult == 2 )
7798+
if (cntrls.oMPresent && mult == 2)
77997799
{
78007800
// oM is always 1 row irrespective of execSize
78017801
numRows--;
78027802
}
78037803

78047804
//although for now HW only supports stencil in SIMD8 mode
7805-
if ( cntrls.isStencil && mult == 2 )
7805+
if (cntrls.isStencil && mult == 2)
78067806
{
78077807
// stencil is always 1 row irrespective of execSize
78087808
numRows--;
78097809
}
78107810

7811-
#define HEADER_OFFSET GENX_GRF_REG_SIZ * 2
7812-
#define HEADER_SIZE GENX_SAMPLER_IO_SZ * 2
7813-
#define RT_HEADER_SIZE 2
7814-
uint8_t headerSizeInDwords = HEADER_SIZE;
7815-
if (emask == vISA_EMASK_M5_NM || emask == vISA_EMASK_M5)
7816-
{
7817-
//For SIMD32 case when RT Write is split in to two SIMD16
7818-
//header information is expected in R0/R2 registers
7819-
headerSizeInDwords += GENX_SAMPLER_IO_SZ;
7820-
}
7811+
// header is always 64 byte
7812+
const int numDWInHeader = 16;
7813+
const int headerBytes = numDWInHeader * sizeof(int);
7814+
const int numHeaderGRF = numDWInHeader / getNativeExecSize();
78217815

78227816
/*
78237817
All other values should be set by default.
@@ -7830,7 +7824,7 @@ int IR_Builder::translateVISARTWrite3DInst(
78307824
if (needsHeaderForMRT || cntrls.isSampleIndex)
78317825
{
78327826
useHeader = true;
7833-
numRows += RT_HEADER_SIZE;
7827+
numRows += numHeaderGRF;
78347828
}
78357829

78367830
bool useSplitSend = useSends();
@@ -7850,10 +7844,10 @@ int IR_Builder::translateVISARTWrite3DInst(
78507844
if (useHeader)
78517845
{
78527846
//subtracting Header
7853-
numRows -= RT_HEADER_SIZE;
7847+
numRows -= numHeaderGRF;
78547848
//creating header
7855-
msg = createSendPayloadDcl(GENX_SAMPLER_IO_SZ * RT_HEADER_SIZE, Type_UD);
7856-
msgF = createSendPayloadDcl(GENX_SAMPLER_IO_SZ * RT_HEADER_SIZE, Type_F);
7849+
msg = createSendPayloadDcl(numDWInHeader, Type_UD);
7850+
msgF = createSendPayloadDcl(numDWInHeader, Type_F);
78577851
msgF->setAliasDeclare(msg, 0);
78587852
}
78597853
//creating payload
@@ -7876,19 +7870,19 @@ int IR_Builder::translateVISARTWrite3DInst(
78767870
msgF->setAliasDeclare(msg, 0);
78777871

78787872
//creating payload declarations.
7879-
payloadUD = createSendPayloadDcl(numElts - (useHeader ? HEADER_SIZE : 0), Type_UD);
7880-
payloadFOrHF = createSendPayloadDcl(numElts - (useHeader ? HEADER_SIZE : 0), FP16Data ? Type_HF : Type_F);
7881-
payloadUW = createSendPayloadDcl(numElts - (useHeader ? HEADER_SIZE : 0), Type_UW);
7873+
payloadUD = createSendPayloadDcl(numElts - (useHeader ? numDWInHeader : 0), Type_UD);
7874+
payloadFOrHF = createSendPayloadDcl(numElts - (useHeader ? numDWInHeader : 0), FP16Data ? Type_HF : Type_F);
7875+
payloadUW = createSendPayloadDcl(numElts - (useHeader ? numDWInHeader : 0), Type_UW);
78827876
payloadF = createSendPayloadDcl(numElts, Type_F);
78837877

78847878
//setting them to alias a top level decl with offset past the header
7885-
payloadUD->setAliasDeclare(msg, useHeader ? HEADER_OFFSET : 0);
7886-
payloadFOrHF->setAliasDeclare(msg, useHeader ? HEADER_OFFSET : 0);
7887-
payloadUW->setAliasDeclare(msg, useHeader ? HEADER_OFFSET : 0);
7879+
payloadUD->setAliasDeclare(msg, useHeader ? headerBytes : 0);
7880+
payloadFOrHF->setAliasDeclare(msg, useHeader ? headerBytes : 0);
7881+
payloadUW->setAliasDeclare(msg, useHeader ? headerBytes : 0);
78887882
payloadF->setAliasDeclare(payloadUD, 0);
78897883
}
78907884

7891-
if( useHeader )
7885+
if (useHeader)
78927886
{
78937887
ASSERT_USER(r1HeaderOpnd, "Second GRF for header that was passed in is NULL.");
78947888
G4_DstRegRegion* payloadRegRgn = createDst(msg->getRegVar(), 0, 0, 1, Type_UD);
@@ -7908,14 +7902,14 @@ int IR_Builder::translateVISARTWrite3DInst(
79087902
#define SAMPLE_INDEX_OFFSET 6
79097903
if (cntrls.isSampleIndex)
79107904
{
7911-
G4_Declare *tmpDcl = createTempVar(2, Type_UD, Any);
7912-
G4_DstRegRegion *tmpDst = createDst(tmpDcl->getRegVar(), 0, 0, 1, Type_UD);
7905+
G4_Declare* tmpDcl = createTempVar(2, Type_UD, Any);
7906+
G4_DstRegRegion* tmpDst = createDst(tmpDcl->getRegVar(), 0, 0, 1, Type_UD);
79137907

79147908
createBinOp(G4_shl, 1, tmpDst, sampleIndexOpnd, createImm(SAMPLE_INDEX_OFFSET, Type_UD), InstOpt_WriteEnable, true);
79157909

79167910
G4_DstRegRegion* payloadUDRegRgn = createDst(msg->getRegVar(), 0, 0, 1, Type_UD);
7917-
G4_SrcRegRegion *tmpSrc = createSrcRegRegion(Mod_src_undef, Direct, tmpDcl->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
7918-
G4_SrcRegRegion *payloadSrc = createSrcRegRegion(Mod_src_undef, Direct, msg->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
7911+
G4_SrcRegRegion* tmpSrc = createSrcRegRegion(Mod_src_undef, Direct, tmpDcl->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
7912+
G4_SrcRegRegion* payloadSrc = createSrcRegRegion(Mod_src_undef, Direct, msg->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
79197913
createBinOp(G4_or, 1, payloadUDRegRgn, payloadSrc, tmpSrc, InstOpt_WriteEnable, true);
79207914
}
79217915

@@ -7924,12 +7918,12 @@ int IR_Builder::translateVISARTWrite3DInst(
79247918
G4_DstRegRegion* dstRTIRgn = createDst(msg->getRegVar(), 0, 2, 1, Type_UD);
79257919

79267920
G4_INST* rtiMovInst = createMov(1, dstRTIRgn, rtIndex, InstOpt_NoOpt, true);
7927-
rtiMovInst->setOptionOn( InstOpt_WriteEnable );
7921+
rtiMovInst->setOptionOn(InstOpt_WriteEnable);
79287922
}
79297923

79307924
//if header is used, then predication value will need to be stored
79317925
//in the header
7932-
if(useHeader && (pred || cntrls.isHeaderMaskfromCe0))
7926+
if (useHeader && (pred || cntrls.isHeaderMaskfromCe0))
79337927
{
79347928
//moving pixelMask in to payload
79357929
G4_DstRegRegion* dstPixelMaskRgn = createDst(
@@ -7952,20 +7946,20 @@ int IR_Builder::translateVISARTWrite3DInst(
79527946
// M0 : WAce0.0; M16 : WAce0.1
79537947
createMov(1, flag, createImm(0, Type_UW), InstOpt_WriteEnable, true);
79547948

7955-
G4_SrcRegRegion *r0_0 = createSrcRegRegion(
7949+
G4_SrcRegRegion* r0_0 = createSrcRegRegion(
79567950
Mod_src_undef, Direct,
79577951
getRealR0()->getRegVar(), 0, 0,
79587952
getRegionStride1(), Type_UW);
7959-
G4_SrcRegRegion *r0_1 = createSrcRegRegion(
7953+
G4_SrcRegRegion* r0_1 = createSrcRegRegion(
79607954
Mod_src_undef, Direct,
79617955
getRealR0()->getRegVar(), 0, 0,
79627956
getRegionStride1(), Type_UW);
7963-
G4_DstRegRegion *nullDst = createNullDst(Type_UW);
7957+
G4_DstRegRegion* nullDst = createNullDst(Type_UW);
79647958
G4_CondMod* flagCM = createCondMod(Mod_e, flagVar, 0);
79657959
createInst(NULL, G4_cmp, flagCM, false, 16, nullDst,
79667960
r0_0, r0_1, Option);
79677961

7968-
G4_SrcRegRegion *flagSrc = createSrcRegRegion(
7962+
G4_SrcRegRegion* flagSrc = createSrcRegRegion(
79697963
Mod_src_undef, Direct,
79707964
flagVar, 0,
79717965
Option == InstOpt_M16 ? 1 : 0,
@@ -7975,20 +7969,20 @@ int IR_Builder::translateVISARTWrite3DInst(
79757969
createMov(1, dstPixelMaskRgn, flagSrc, InstOpt_WriteEnable, true);
79767970
};
79777971

7978-
G4_SrcRegRegion *pixelMask = NULL;
7972+
G4_SrcRegRegion* pixelMask = NULL;
79797973
if (emask == vISA_EMASK_M5_NM || emask == vISA_EMASK_M5)
79807974
{
79817975
if (pred)
79827976
{
79837977
//this is a Second half of a SIMD32 RT write. We need to get second half of flag register.
79847978
//mov whole register in to GRF, move second word of it in to payload.
79857979

7986-
G4_SrcRegRegion *pixelMaskTmp = createSrcRegRegion(
7987-
Mod_src_undef, Direct,
7988-
pred->getBase()->asRegVar(), 0, 0,
7989-
getRegionScalar(), Type_UD);
7990-
G4_Declare *tmpDcl = createTempVar(1, Type_UD, Any);
7991-
G4_DstRegRegion *tmpDst = createDst(tmpDcl->getRegVar(), 0, 0, 1, Type_UD);
7980+
G4_SrcRegRegion* pixelMaskTmp = createSrcRegRegion(
7981+
Mod_src_undef, Direct,
7982+
pred->getBase()->asRegVar(), 0, 0,
7983+
getRegionScalar(), Type_UD);
7984+
G4_Declare* tmpDcl = createTempVar(1, Type_UD, Any);
7985+
G4_DstRegRegion* tmpDst = createDst(tmpDcl->getRegVar(), 0, 0, 1, Type_UD);
79927986
createMov(1, tmpDst, pixelMaskTmp, InstOpt_WriteEnable, true);
79937987

79947988
pixelMask = createSrcRegRegion(Mod_src_undef, Direct,
@@ -8005,7 +7999,7 @@ int IR_Builder::translateVISARTWrite3DInst(
80057999
}
80068000
else
80078001
{
8008-
G4_SrcRegRegion *ce0 = createSrcRegRegion(
8002+
G4_SrcRegRegion* ce0 = createSrcRegRegion(
80098003
Mod_src_undef, Direct,
80108004
phyregpool.getMask0Reg(), 0, 0,
80118005
getRegionScalar(), Type_UD);
@@ -8035,7 +8029,7 @@ int IR_Builder::translateVISARTWrite3DInst(
80358029
}
80368030
else
80378031
{
8038-
G4_SrcRegRegion *ce0 = createSrcRegRegion(
8032+
G4_SrcRegRegion* ce0 = createSrcRegRegion(
80398033
Mod_src_undef, Direct,
80408034
phyregpool.getMask0Reg(), 0, 0,
80418035
getRegionScalar(), Type_UD);
@@ -8053,34 +8047,34 @@ int IR_Builder::translateVISARTWrite3DInst(
80538047

80548048
//setting first DWORD of MHC_RT_C0 - Render Target Message Header Control
80558049

8056-
if( cntrls.isStencil )
8050+
if (cntrls.isStencil)
80578051
{
8058-
orImmVal = ( 0x1 << 14 );
8052+
orImmVal = (0x1 << 14);
80598053
}
80608054

8061-
if( cntrls.zPresent )
8055+
if (cntrls.zPresent)
80628056
{
8063-
orImmVal = ( 0x1 << 13 );
8057+
orImmVal = (0x1 << 13);
80648058
}
80658059

8066-
if( cntrls.oMPresent )
8060+
if (cntrls.oMPresent)
80678061
{
8068-
orImmVal |= ( 0x1 << 12 );
8062+
orImmVal |= (0x1 << 12);
80698063
}
80708064

8071-
if( cntrls.s0aPresent )
8065+
if (cntrls.s0aPresent)
80728066
{
8073-
orImmVal |= ( 0x1 << 11 );
8067+
orImmVal |= (0x1 << 11);
80748068
}
80758069

8076-
if( orImmVal != 0 )
8070+
if (orImmVal != 0)
80778071
{
80788072
G4_SrcRegRegion* immSrcRegRgn = createSrcRegRegion(Mod_src_undef, Direct, msg->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
80798073

80808074
G4_DstRegRegion* immDstRegRgn = createDst(msg->getRegVar(), 0, 0, 1, Type_UD);
80818075

8082-
G4_INST* immOrInst = createBinOp(G4_or, 1, immDstRegRgn, immSrcRegRgn, createImm( orImmVal, Type_UD ), InstOpt_WriteEnable, true );
8083-
immOrInst->setOptionOn( InstOpt_WriteEnable );
8076+
G4_INST* immOrInst = createBinOp(G4_or, 1, immDstRegRgn, immSrcRegRgn, createImm(orImmVal, Type_UD), InstOpt_WriteEnable, true);
8077+
immOrInst->setOptionOn(InstOpt_WriteEnable);
80848078
}
80858079
}
80868080

@@ -8100,44 +8094,44 @@ int IR_Builder::translateVISARTWrite3DInst(
81008094
A->isNullReg())
81018095
canCoalesce = false;
81028096

8103-
if( canCoalesce && cntrls.s0aPresent)
8097+
if (canCoalesce && cntrls.s0aPresent)
81048098
{
81058099
prevRawOpnd = s0a;
81068100
offset = getByteOffsetSrcRegion(s0a);
81078101
}
81088102

8109-
if( canCoalesce && cntrls.oMPresent )
8103+
if (canCoalesce && cntrls.oMPresent)
81108104
{
81118105
//by default it will check based on first opnd type, but that can be HF, F, we need second operand type
81128106
//according to spec oM is UW
8113-
canCoalesce = checkIfRegionsAreConsecutive( prevRawOpnd, oM, execSize, oM->getType() );
8107+
canCoalesce = checkIfRegionsAreConsecutive(prevRawOpnd, oM, execSize, oM->getType());
81148108
prevRawOpnd = oM;
8115-
if( offset == UNINITIALIZED_DWORD )
8109+
if (offset == UNINITIALIZED_DWORD)
81168110
{
81178111
offset = getByteOffsetSrcRegion(oM);
81188112
}
81198113
}
81208114

8121-
if( canCoalesce )
8115+
if (canCoalesce)
81228116
{
8123-
if( execSize == 16 && cntrls.oMPresent )
8117+
if (execSize == 16 && cntrls.oMPresent)
81248118
{
81258119
// oM is 1 GRF for SIMD16 since it is UW type
8126-
canCoalesce = checkIfRegionsAreConsecutive( oM, R, execSize, Type_UW );
8120+
canCoalesce = checkIfRegionsAreConsecutive(oM, R, execSize, Type_UW);
81278121
prevRawOpnd = R;
81288122
}
81298123
else
81308124
{
8131-
canCoalesce = checkIfRegionsAreConsecutive( prevRawOpnd, R, execSize );
8125+
canCoalesce = checkIfRegionsAreConsecutive(prevRawOpnd, R, execSize);
81328126
prevRawOpnd = R;
81338127
}
81348128

8135-
if( offset == UNINITIALIZED_DWORD )
8129+
if (offset == UNINITIALIZED_DWORD)
81368130
{
81378131
offset = getByteOffsetSrcRegion(prevRawOpnd);
81388132
}
81398133

8140-
if( canCoalesce )
8134+
if (canCoalesce)
81418135
{
81428136
auto tempExecSize = execSize;
81438137
if (FP16Data && execSize == 8)
@@ -8146,7 +8140,7 @@ int IR_Builder::translateVISARTWrite3DInst(
81468140
checkIfRegionsAreConsecutive(G, B, tempExecSize) &&
81478141
checkIfRegionsAreConsecutive(B, A, tempExecSize);
81488142
prevRawOpnd = A;
8149-
if( offset == UNINITIALIZED_DWORD )
8143+
if (offset == UNINITIALIZED_DWORD)
81508144
{
81518145
offset = getByteOffsetSrcRegion(A);
81528146
if (FP16Data && execSize == 8)
@@ -8155,15 +8149,15 @@ int IR_Builder::translateVISARTWrite3DInst(
81558149
}
81568150
}
81578151

8158-
if( canCoalesce && cntrls.zPresent )
8152+
if (canCoalesce && cntrls.zPresent)
81598153
{
8160-
canCoalesce = checkIfRegionsAreConsecutive( prevRawOpnd, Z, execSize );
8154+
canCoalesce = checkIfRegionsAreConsecutive(prevRawOpnd, Z, execSize);
81618155
prevRawOpnd = Z;
81628156
}
81638157

8164-
if( canCoalesce && cntrls.isStencil )
8158+
if (canCoalesce && cntrls.isStencil)
81658159
{
8166-
canCoalesce = checkIfRegionsAreConsecutive( prevRawOpnd, S, execSize );
8160+
canCoalesce = checkIfRegionsAreConsecutive(prevRawOpnd, S, execSize);
81678161
prevRawOpnd = S;
81688162
}
81698163

@@ -8299,7 +8293,7 @@ int IR_Builder::translateVISARTWrite3DInst(
82998293
if (useHeader)
83008294
{
83018295
m0 = Create_Src_Opnd_From_Dcl(msg, getRegionStride1());
8302-
msgDesc = createSendMsgDesc(fc, 0, RT_HEADER_SIZE, SFID::DP_WRITE, numRows,
8296+
msgDesc = createSendMsgDesc(fc, 0, numHeaderGRF, SFID::DP_WRITE, numRows,
83038297
0, SendAccess::WRITE_ONLY, surface);
83048298
msgDesc->setHeaderPresent(useHeader);
83058299
}

0 commit comments

Comments
 (0)