@@ -4723,22 +4723,18 @@ void G4_DstRegRegion::setDstBitVec( uint8_t exec_size )
47234723 unsigned short s_size = horzStride * type_size;
47244724
47254725 // General cases.
4726- unsigned short bit_seq = G4_Type_Table[type].footprint ;
4726+ uint64_t bit_seq = G4_Type_Table[type].footprint ;
47274727 for (uint8_t i = 0 ; i < exec_size; ++i)
47284728 {
47294729 int eltOffset = i * s_size;
4730- if (eltOffset >= getGRFSize ())
4730+ // no element can cross 64-byte boundary
4731+ if (eltOffset >= 64 )
47314732 {
4732- footprint1 |= (( uint64_t ) bit_seq) << (eltOffset - getGRFSize () );
4733+ footprint1 |= bit_seq << (eltOffset - 64 );
47334734 }
4734- else if (eltOffset + G4_Type_Table[type]. byteSize < getGRFSize ())
4735+ else
47354736 {
4736- footprint0 |= ((uint64_t )bit_seq) << eltOffset;
4737- }
4738- else
4739- {
4740- footprint0 |= ((uint64_t )bit_seq) << eltOffset; // 4 + 31 --> 1 BIT MASKED
4741- footprint1 |= ((uint64_t )bit_seq) >> (getGRFSize () - eltOffset); // 32 - 31 = 1, keep 3
4737+ footprint0 |= bit_seq << eltOffset;
47424738 }
47434739 }
47444740
@@ -4945,67 +4941,33 @@ static G4_CmpRelation compareRegRegionToOperand(G4_Operand* regRegion, G4_Operan
49454941 return Rel_interfere;
49464942 }
49474943
4948- // Now both operands are within two GRFs, comparing their L/H vectors
4949- // to get more precise relations
4950- int dist = left_bound2 - myLeftBound;
4951- uint64_t new_bitVecL = myBitVecL, new_bitVecH = myBitVecH;
4952- if (dist > 0 && dist < (2 * GENX_GRF_REG_SIZ))
4953- {
4954- if (dist >= GENX_GRF_REG_SIZ)
4955- {
4956- uint64_t lbit = new_bitVecH >> (dist - GENX_GRF_REG_SIZ);
4957- new_bitVecL = lbit;
4958- new_bitVecH = 0 ;
4959- }
4960- else
4961- {
4962- new_bitVecL >>= dist;
4963- uint64_t lbit = new_bitVecH << (GENX_GRF_REG_SIZ - dist);
4964- new_bitVecL |= lbit;
4965- new_bitVecH >>= dist;
4966- }
4967- }
4968- else if (dist < 0 && dist > (-2 * GENX_GRF_REG_SIZ))
4944+ // Now both operands are within two GRFs, compare their footprint to get precise relations
4945+ int maskSize = 2 * getGRFSize ();
4946+ if (myDcl)
49694947 {
4970- dist = abs (dist);
4971- if (dist >= GENX_GRF_REG_SIZ)
4972- {
4973- uint64_t lbit = opndBitVecH >> (dist - GENX_GRF_REG_SIZ);
4974- opndBitVecL = lbit;
4975- opndBitVecH = 0 ;
4976- }
4977- else
4978- {
4979- opndBitVecL >>= dist;
4980- uint64_t lbit = opndBitVecH << (GENX_GRF_REG_SIZ - dist);
4981- opndBitVecL |= lbit;
4982- opndBitVecH >>= dist;
4983- }
4948+ maskSize = myDcl->getRegVar ()->isFlag () ? myDcl->getNumberFlagElements ()
4949+ : myDcl->getByteSize ();
49844950 }
4985- uint64_t commonL = new_bitVecL & opndBitVecL, commonH = new_bitVecH & opndBitVecH;
4951+ BitSet myBitSet (maskSize, false );
4952+ BitSet otherBitSet (maskSize, false );
4953+ regRegion->updateFootPrint (myBitSet, true );
4954+ opnd->updateFootPrint (otherBitSet, true );
49864955
4987- if (myLeftBound <= left_bound2 &&
4988- myRightBound >= right_bound2 &&
4989- commonL == opndBitVecL &&
4990- commonH == opndBitVecH)
4991- {
4992- return Rel_gt;
4993- }
4994- else if (myLeftBound >= left_bound2 &&
4995- myRightBound <= right_bound2 &&
4996- commonL == new_bitVecL &&
4997- commonH == new_bitVecH)
4998- {
4999- return Rel_lt;
5000- }
5001- else if (dist < (2 *GENX_GRF_REG_SIZ) && dist > (-2 * GENX_GRF_REG_SIZ) && commonL == 0 && commonH == 0 )
4956+ BitSet tmp = myBitSet;
4957+ myBitSet &= otherBitSet;
4958+ if (myBitSet.isEmpty ())
50024959 {
50034960 return Rel_disjoint;
50044961 }
5005- else
4962+
4963+ myBitSet = tmp;
4964+ myBitSet -= otherBitSet;
4965+ if (myBitSet.isEmpty ())
50064966 {
5007- return Rel_interfere ;
4967+ return Rel_lt ;
50084968 }
4969+ otherBitSet -= tmp;
4970+ return otherBitSet.isEmpty () ? Rel_gt : Rel_interfere;
50094971 }
50104972 }
50114973}
@@ -5974,8 +5936,8 @@ void G4_Predicate::splitPred( )
59745936
59755937 bitVec[0 ] = ((uint32_t )getBitVecL ()) >> shiftLen;
59765938}
5977- void
5978- G4_CondMod::emit (std::ostream& output, bool symbolreg)
5939+
5940+ void G4_CondMod::emit (std::ostream& output, bool symbolreg)
59795941{
59805942 output << ' .' << CondModStr[mod];
59815943 output << ' .' ;
@@ -6191,53 +6153,6 @@ G4_Imm::emitAutoFmt(std::ostream& output)
61916153 }
61926154}
61936155
6194- int64_t G4_Imm::typecastVals (int64_t value, G4_Type type)
6195- {
6196- int64_t retVal = 0 ;
6197- switch (type)
6198- {
6199- case Type_UD:
6200- case Type_UV:
6201- case Type_VF:
6202- {
6203- retVal = (int64_t )((unsigned int )value);
6204- break ;
6205- }
6206- case Type_D:
6207- case Type_V:
6208- {
6209- retVal = (int64_t )((int )value);
6210- break ;
6211- }
6212- case Type_UW:
6213- {
6214- retVal = (int64_t )((uint16_t )value);
6215- break ;
6216- }
6217- case Type_W:
6218- {
6219- retVal = (int64_t )((int16_t )value);
6220- break ;
6221- }
6222- case Type_UB:
6223- {
6224- retVal = (int64_t )((uint8_t )value);
6225- break ;
6226- }
6227- case Type_B:
6228- {
6229- retVal = (int64_t )((int8_t )value);
6230- break ;
6231- }
6232- default :
6233- {
6234- // Dont do float conversions
6235- retVal = value;
6236- }
6237- }
6238- return retVal;
6239- }
6240-
62416156G4_RegVar *
62426157G4_RegVarTransient::getNonTransientBaseRegVar ()
62436158{
@@ -6411,7 +6326,7 @@ void G4_SrcRegRegion::computeLeftBound()
64116326
64126327void G4_SrcRegRegion::setSrcBitVec (uint8_t exec_size)
64136328{
6414- unsigned short bit_seq = G4_Type_Table[type].footprint ;
6329+ uint64_t bit_seq = G4_Type_Table[type].footprint ;
64156330 unsigned short typeSize = (unsigned short )G4_Type_Table[type].byteSize ;
64166331
64176332 uint64_t footPrint0 = 0 ;
@@ -6424,24 +6339,14 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
64246339 }
64256340 else if (desc->isContiguous (exec_size))
64266341 {
6342+ // fast path
64276343 int totalBytes = exec_size * typeSize;
6428- MUST_BE_TRUE (totalBytes <= 2 * getGRFSize (), " total bits exceeds 2 GRFs" );
6429- if (totalBytes == getGRFSize () * 2 )
6430- {
6431- footPrint0 = ULLONG_MAX;
6432- footPrint1 = ULLONG_MAX;
6433- }
6434- else
6435- {
6436- if (totalBytes <= getGRFSize ())
6437- {
6438- footPrint0 = (1ULL << totalBytes) - 1 ;
6439- }
6440- else
6441- {
6442- footPrint0 = ULLONG_MAX;
6443- footPrint1 = (1ULL << (totalBytes - getGRFSize ())) - 1 ;
6444- }
6344+ MUST_BE_TRUE (totalBytes <= 2 * getGRFSize (), " total bytes exceed 2 GRFs" );
6345+
6346+ footPrint0 = totalBytes < 64 ? (1ULL << totalBytes) - 1 : ULLONG_MAX;
6347+ if (totalBytes > 64 )
6348+ {
6349+ footPrint1 = totalBytes == 128 ? ULLONG_MAX : (1ULL << (totalBytes - 64 )) - 1 ;
64456350 }
64466351 }
64476352 else
@@ -6451,28 +6356,21 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
64516356 for (int j = 0 ; j < desc->width ; ++j)
64526357 {
64536358 int eltOffset = i * desc->vertStride * typeSize + j * desc->horzStride * typeSize;
6454-
6455- if (eltOffset >= getGRFSize ())
6456- {
6457- footPrint1 |= ((uint64_t )bit_seq) << (eltOffset - getGRFSize ());
6458- }
6459- else if (eltOffset + G4_Type_Table[type].byteSize < getGRFSize ())
6359+ // no element can cross 64-byte boundary
6360+ if (eltOffset >= 64 )
64606361 {
6461- footPrint0 |= (( uint64_t ) bit_seq) << eltOffset;
6362+ footPrint1 |= bit_seq << ( eltOffset - 64 ) ;
64626363 }
64636364 else
64646365 {
6465- footPrint0 |= ((uint64_t )bit_seq) << eltOffset;
6466- footPrint1 |= ((uint64_t )bit_seq) >> (getGRFSize () - eltOffset);
6366+ footPrint0 |= bit_seq << eltOffset;
64676367 }
64686368 }
64696369 }
64706370 }
64716371
64726372 bitVec[0 ] = footPrint0;
64736373 bitVec[1 ] = footPrint1;
6474-
6475- return ;
64766374}
64776375
64786376unsigned G4_SrcRegRegion::computeRightBound ( uint8_t exec_size )
@@ -6905,6 +6803,76 @@ void G4_INST::computeLeftBoundForImplAcc(G4_Operand* opnd)
69056803 }
69066804}
69076805
6806+ //
6807+ // Normalize an operand's bitvec footprint based on its left bound
6808+ // and update the given bitset.
6809+ // If isSet is true, we set all bits that are covered by this operand.
6810+ // If isSet os false, we clear all bits that are covered by this operand.
6811+ //
6812+ void G4_Operand::updateFootPrint (BitSet& footprint, bool isSet)
6813+ {
6814+ unsigned N = NUM_BITS_PER_ELT;
6815+ unsigned lb = getLeftBound ();
6816+ unsigned rb = getRightBound ();
6817+ const bool doFastPath = true ; // for debugging
6818+
6819+ if (doFastPath && lb % N == 0 && (rb + 1 ) % N == 0 )
6820+ {
6821+ // lb is 32-byte aligned, set one dword at a time
6822+ unsigned idx = lb / N;
6823+ unsigned endIdx = rb / N;
6824+ // get the precise footprint for the first two GRF
6825+ for (int i = 0 ; i < 2 && idx <= endIdx; ++i, ++idx)
6826+ {
6827+ uint64_t bits = getBitVecL ();
6828+ uint32_t bitVal = (uint32_t )(i % 2 ? bits >> N : bits);
6829+ if (isSet)
6830+ {
6831+ footprint.setElt (idx, bitVal);
6832+ }
6833+ else
6834+ {
6835+ footprint.resetElt (idx, bitVal);
6836+ }
6837+ }
6838+
6839+ // beyond the first two GRF we assume every byte is touched
6840+ while (idx <= endIdx)
6841+ {
6842+ if (isSet)
6843+ {
6844+ footprint.setElt (idx, 0xFFFFFFFF );
6845+ }
6846+ else
6847+ {
6848+ footprint.resetElt (idx, 0xFFFFFFFF );
6849+ }
6850+ idx++;
6851+ }
6852+ }
6853+ else
6854+ {
6855+ // handle unaligned case
6856+ uint64_t mask0 = getBitVecL ();
6857+ unsigned j = lb;
6858+ for (unsigned i = 0 ; i < 64 && j <= rb; ++i, ++j)
6859+ {
6860+ if (mask0 & (1ULL << i))
6861+ footprint.set (j, isSet);
6862+ }
6863+ while (j++ <= rb)
6864+ footprint.set (j, isSet);
6865+ }
6866+ }
6867+
6868+ // update bit vector for this operand based on it size
6869+ // We assume all bytes are touched
6870+ void G4_Operand::setBitVecFromSize (uint32_t NBytes)
6871+ {
6872+ bitVec[0 ] = NBytes < 64 ? (1ULL << NBytes) - 1 : ULLONG_MAX;
6873+ bitVec[1 ] = 0 ;
6874+ }
6875+
69086876// Left and right bound for every operand is based off
69096877// top most dcl.
69106878// For flag register as dst/src/pred/cond mod, each bit of
@@ -7019,23 +6987,7 @@ void G4_InstSend::computeRightBound(G4_Operand* opnd)
70196987 LB + numReg * G4_GRF_REG_NBYTES) - 1 ;
70206988
70216989 unsigned NBytes = RB - LB + 1 ;
7022- if (NBytes <= 32 )
7023- {
7024- uint32_t Mask = uint32_t (-1 ) >> (32 - NBytes);
7025- opnd->setBitVecL (Mask);
7026- }
7027- else if (NBytes <= 64 )
7028- {
7029- opnd->setBitVecL (0xFFFFFFFF );
7030- uint32_t Mask = uint32_t (-1 ) >> (64 - NBytes);
7031- opnd->setBitVecH (Mask);
7032- }
7033- else
7034- {
7035- // NBytes > 64
7036- opnd->setBitVecL (0xFFFFFFFF );
7037- opnd->setBitVecH (0xFFFFFFFF );
7038- }
6990+ opnd->setBitVecFromSize (NBytes);
70396991 opnd->setRightBound (RB);
70406992 };
70416993
0 commit comments