@@ -5363,15 +5363,15 @@ void HWConformity::avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB *bb,
5363
5363
return ;
5364
5364
}
5365
5365
5366
+ unsigned grfSize = builder.getGRFSize ();
5366
5367
G4_Declare *dstDcl = dst->getTopDcl ();
5367
5368
if (dstDcl) {
5368
5369
G4_DstRegRegion *dstRgn = dst;
5369
5370
bool dstCrossGRF =
5370
- (dstRgn->getSubRegOff () * dstRgn->getTypeSize () +
5371
- (dstRgn->getLinearizedEnd () - dstRgn->getLinearizedStart ()) + 1 ) >
5372
- kernel.numEltPerGRF <Type_UB>();
5373
- int dstFirstHalf =
5374
- dst->getLinearizedStart () / kernel.numEltPerGRF <Type_UB>();
5371
+ ((dstRgn->getSubRegOff () * dstRgn->getTypeSize ()) % grfSize +
5372
+ (dstRgn->getLinearizedEnd () - dstRgn->getLinearizedStart ()) + 1 ) >
5373
+ grfSize;
5374
+ int dstFirstHalf = dst->getLinearizedStart () / grfSize;
5375
5375
5376
5376
bool srcOverlap = false ;
5377
5377
for (int i = 0 , nSrcs = inst->getNumSrc (); i < nSrcs; i++) {
@@ -5387,9 +5387,9 @@ void HWConformity::avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB *bb,
5387
5387
if (srcDcl == dstDcl && srcRgn->getRegAccess () == Direct &&
5388
5388
srcRgn->getBase ()->isRegVar ()) {
5389
5389
bool srcCrossGRF =
5390
- (srcRgn->getSubRegOff () * srcRgn->getTypeSize () +
5390
+ (( srcRgn->getSubRegOff () * srcRgn->getTypeSize ()) % grfSize +
5391
5391
(srcRgn->getLinearizedEnd () - srcRgn->getLinearizedStart ()) +
5392
- 1 ) > kernel. numEltPerGRF <Type_UB>() ;
5392
+ 1 ) > grfSize ;
5393
5393
// The half define in region rule "second half of a source operand
5394
5394
// must not point to the same register as the first half of
5395
5395
// destination operand in a compressed instruction" is exactly size
@@ -5404,7 +5404,7 @@ void HWConformity::avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB *bb,
5404
5404
((srcRgn->getLinearizedEnd () -
5405
5405
srcRgn->getLinearizedStart () + 1 ) /
5406
5406
2 )) /
5407
- kernel. numEltPerGRF <Type_UB>() ;
5407
+ grfSize ;
5408
5408
} else { // For non-congtiguous region, there are holes in the region,
5409
5409
// the start of second half elements need be calcauted in
5410
5410
// stride and elemement sizes at same time.
@@ -5427,15 +5427,14 @@ void HWConformity::avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB *bb,
5427
5427
numElePerRow = rowSize / execTypeSize,
5428
5428
numExecEmePerRow =
5429
5429
regionDesc->horzStride == 0 ? 1 : regionDesc->width ;
5430
- uint16_t totalNumEle =
5431
- (regionDesc-> vertStride >= numElePerRow )
5432
- ? (numRows * numExecEmePerRow)
5433
- : (srcRgn-> getRightBound () - srcRgn->getLeftBound () + 1 ) /
5434
- execTypeSize;
5430
+ uint16_t totalNumEle = (regionDesc-> vertStride >= numElePerRow)
5431
+ ? (numRows * numExecEmePerRow )
5432
+ : (srcRgn-> getLinearizedEnd () -
5433
+ srcRgn->getLinearizedStart () + 1 ) /
5434
+ execTypeSize;
5435
5435
srcSecondHalf =
5436
- (srcRgn->getLeftBound () % builder.numEltPerGRF <Type_UB>() +
5437
- (totalNumEle / 2 ) * vertSize) /
5438
- builder.numEltPerGRF <Type_UB>();
5436
+ (srcRgn->getLinearizedStart () + (totalNumEle / 2 ) * vertSize) /
5437
+ grfSize;
5439
5438
}
5440
5439
5441
5440
if (dstCrossGRF || srcCrossGRF) {
0 commit comments