@@ -1948,177 +1948,223 @@ void Interference::markInterferenceToAvoidDstSrcOverlap(G4_BB *bb,
19481948 bool isDstLocallyAssigned = false ;
19491949 unsigned dstId = 0 ;
19501950 int dstPreg = 0 , dstNumRows = 0 ;
1951- bool dstOpndNumRows = false ;
1951+ bool dstMoreThan1GRF = false ;
1952+ bool dstMoreThan2GRF = false ;
19521953
19531954 G4_DstRegRegion *dst = inst->getDst ();
1954- if (dst->getBase ()->isRegVar () &&
1955- (dst->getTopDcl ()->getRegFile () == G4_GRF)) {
1956- G4_Declare *dstDcl = dst->getTopDcl ();
1957- int dstOffset = dst->getLeftBound () / kernel.numEltPerGRF <Type_UB>();
1958- bool isDstEvenAlign = gra.isEvenAligned (dstDcl);
1955+ if (!dst->getBase ()->isRegVar () || dst->getTopDcl ()->getRegFile () != G4_GRF) {
1956+ return ;
1957+ }
19591958
1960- if (dst->getBase ()->isRegAllocPartaker ()) {
1961- isDstRegAllocPartaker = true ;
1962- dstId = ((G4_RegVar *)dst->getBase ())->getId ();
1963- dstOpndNumRows = dst->getSubRegOff () * dst->getTypeSize () +
1964- dst->getLinearizedEnd () - dst->getLinearizedStart () +
1965- 1 >
1966- kernel.numEltPerGRF <Type_UB>();
1967- } else if (gra.useLocalRA ) {
1968- LocalLiveRange *localLR = NULL ;
1969- G4_Declare *topdcl = GetTopDclFromRegRegion (dst);
1959+ G4_Declare *dstDcl = dst->getTopDcl ();
1960+ int dstOffset = dst->getLeftBound () / kernel.numEltPerGRF <Type_UB>();
1961+ bool isDstEvenAlign = gra.isEvenAligned (dstDcl);
1962+ bool isDstQuadAlign = gra.isQuadAligned (dstDcl);
19701963
1971- if (topdcl)
1972- localLR = gra.getLocalLR (topdcl);
1973- if (localLR && localLR->getAssigned ()) {
1974- int sreg;
1975- G4_VarBase *preg = localLR->getPhyReg (sreg);
1964+ if (dst->getBase ()->isRegAllocPartaker ()) {
1965+ isDstRegAllocPartaker = true ;
1966+ dstId = ((G4_RegVar *)dst->getBase ())->getId ();
1967+ unsigned dstGRFByteSizeWithOffset =
1968+ dst->getSubRegOff () * dst->getTypeSize () + dst->getLinearizedEnd () -
1969+ dst->getLinearizedStart () + 1 ;
1970+ dstMoreThan1GRF = dstGRFByteSizeWithOffset > kernel.numEltPerGRF <Type_UB>();
1971+ dstMoreThan2GRF =
1972+ dstGRFByteSizeWithOffset > (2 * kernel.numEltPerGRF <Type_UB>());
1973+ } else if (gra.useLocalRA ) {
1974+ LocalLiveRange *localLR = NULL ;
1975+ G4_Declare *topdcl = GetTopDclFromRegRegion (dst);
1976+
1977+ if (topdcl)
1978+ localLR = gra.getLocalLR (topdcl);
1979+ if (localLR && localLR->getAssigned ()) {
1980+ int sreg;
1981+ G4_VarBase *preg = localLR->getPhyReg (sreg);
1982+
1983+ vISA_ASSERT (preg->isGreg (), " Register in dst was not GRF" );
1984+
1985+ isDstLocallyAssigned = true ;
1986+ dstPreg = preg->asGreg ()->getRegNum ();
1987+ dstNumRows = localLR->getTopDcl ()->getNumRows ();
1988+
1989+ unsigned dstGRFByteSizeWithOffset =
1990+ dst->getSubRegOff () * dst->getTypeSize () + dst->getLinearizedEnd () -
1991+ dst->getLinearizedStart () + 1 ;
1992+ dstMoreThan1GRF =
1993+ dstGRFByteSizeWithOffset > kernel.numEltPerGRF <Type_UB>();
1994+ dstMoreThan2GRF =
1995+ dstGRFByteSizeWithOffset > (2 * kernel.numEltPerGRF <Type_UB>());
1996+
1997+ isDstEvenAlign = (dstPreg % 2 == 0 );
1998+ isDstQuadAlign = (dstPreg % 4 == 0 );
1999+ }
2000+ }
2001+
2002+ if (!isDstRegAllocPartaker && !isDstLocallyAssigned) {
2003+ return ;
2004+ }
19762005
1977- vISA_ASSERT (preg->isGreg (), " Register in dst was not GRF" );
2006+ for (unsigned j = 0 , numSrc = inst->getNumSrc (); j < numSrc; j++) {
2007+ if (inst->isDpas () && j != 1 )
2008+ continue ;
2009+ G4_Operand *src = inst->getSrc (j);
19782010
1979- isDstLocallyAssigned = true ;
1980- dstPreg = preg-> asGreg ()-> getRegNum () ;
1981- dstNumRows = localLR-> getTopDcl ()-> getNumRows ();
1982- dstOpndNumRows = dst-> getSubRegOff () * dst-> getTypeSize () +
1983- dst-> getLinearizedEnd () - dst-> getLinearizedStart () + 1 >
1984- kernel. numEltPerGRF <Type_UB>();
1985- isDstEvenAlign = (dstPreg % 2 == 0 );
1986- }
2011+ if (src == NULL ) {
2012+ continue ;
2013+ }
2014+ if (!src-> isSrcRegRegion ()) {
2015+ continue ;
2016+ }
2017+ if (!src-> asSrcRegRegion ()-> getBase ()-> isRegVar ()) {
2018+ continue ;
19872019 }
19882020
1989- if (isDstRegAllocPartaker || isDstLocallyAssigned) {
1990- for (unsigned j = 0 , numSrc = inst->getNumSrc (); j < numSrc; j++) {
1991- if (inst->isDpas () && j != 1 )
1992- continue ;
1993- G4_Operand *src = inst->getSrc (j);
1994- if (src != NULL && src->isSrcRegRegion () &&
1995- src->asSrcRegRegion ()->getBase ()->isRegVar ()) {
1996- G4_SrcRegRegion *srcRgn = src->asSrcRegRegion ();
1997- G4_Declare *srcDcl = src->getTopDcl ();
1998- if (srcRgn->getRegAccess () == Direct &&
1999- (src->getTopDcl ()->getRegFile () == G4_GRF ||
2000- src->getTopDcl ()->getRegFile () == G4_INPUT)) {
2001- int srcOffset =
2002- src->getLeftBound () / kernel.numEltPerGRF <Type_UB>();
2003- bool srcOpndNumRows =
2004- srcRgn->getSubRegOff () * srcRgn->getTypeSize () +
2005- srcRgn->getLinearizedEnd () -
2006- srcRgn->getLinearizedStart () + 1 >
2007- kernel.numEltPerGRF <Type_UB>();
2008-
2009- int srcReg = 0 ;
2010- bool isSrcEvenAlign = gra.isEvenAligned (srcDcl);
2011- if (!src->asSrcRegRegion ()->getBase ()->isRegAllocPartaker () &&
2012- gra.useLocalRA ) {
2013- int sreg;
2014- LocalLiveRange *localLR = NULL ;
2015- G4_Declare *topdcl = GetTopDclFromRegRegion (src);
2021+ G4_SrcRegRegion *srcRgn = src->asSrcRegRegion ();
2022+ G4_Declare *srcDcl = src->getTopDcl ();
2023+ if (srcRgn->getRegAccess () == Direct &&
2024+ (src->getTopDcl ()->getRegFile () == G4_GRF ||
2025+ src->getTopDcl ()->getRegFile () == G4_INPUT)) {
2026+ int srcOffset = src->getLeftBound () / kernel.numEltPerGRF <Type_UB>();
2027+ unsigned srcGRFByteSizeWithOffset =
2028+ srcRgn->getSubRegOff () * srcRgn->getTypeSize () +
2029+ srcRgn->getLinearizedEnd () - srcRgn->getLinearizedStart () + 1 ;
20162030
2017- if (topdcl)
2018- localLR = gra. getLocalLR (topdcl );
2019- if (localLR && localLR-> getAssigned ()) {
2020- G4_VarBase *preg = localLR-> getPhyReg (sreg );
2031+ bool srcMoreThan1GRF =
2032+ srcGRFByteSizeWithOffset > kernel. numEltPerGRF <Type_UB>( );
2033+ bool srcMoreThan2GRF =
2034+ srcGRFByteSizeWithOffset > ( 2 * kernel. numEltPerGRF <Type_UB>() );
20212035
2022- vISA_ASSERT (preg->isGreg (), " Register in src was not GRF" );
2023- srcReg = preg->asGreg ()->getRegNum ();
2024- isSrcEvenAlign = (srcReg % 2 == 0 );
2025- }
2026- }
2036+ int srcReg = 0 ;
2037+ bool isSrcEvenAlign = gra.isEvenAligned (srcDcl);
2038+ bool isSrcQuadAlign = gra.isQuadAligned (srcDcl);
20272039
2028- if (srcDcl->getRegFile () == G4_INPUT &&
2029- srcDcl->getRegVar ()->getPhyReg () != NULL &&
2030- srcDcl->getRegVar ()->getPhyReg ()->isGreg ()) {
2031- srcReg = srcDcl->getRegVar ()->getPhyReg ()->asGreg ()->getRegNum ();
2032- isSrcEvenAlign = (srcReg % 2 == 0 );
2033- }
2040+ if (!src->asSrcRegRegion ()->getBase ()->isRegAllocPartaker () &&
2041+ gra.useLocalRA ) {
2042+ int sreg;
2043+ LocalLiveRange *localLR = NULL ;
2044+ G4_Declare *topdcl = GetTopDclFromRegRegion (src);
20342045
2035- if (dstOpndNumRows || srcOpndNumRows) {
2036- if (!(isDstEvenAlign && isSrcEvenAlign &&
2037- srcOffset % 2 == dstOffset % 2 && dstOpndNumRows &&
2038- srcOpndNumRows)) {
2039- if (src->asSrcRegRegion ()->getBase ()->isRegAllocPartaker ()) {
2040- unsigned srcId =
2041- src->asSrcRegRegion ()->getBase ()->asRegVar ()->getId ();
2046+ if (topdcl)
2047+ localLR = gra.getLocalLR (topdcl);
2048+ if (localLR && localLR->getAssigned ()) {
2049+ G4_VarBase *preg = localLR->getPhyReg (sreg);
2050+
2051+ vISA_ASSERT (preg->isGreg (), " Register in src was not GRF" );
2052+ srcReg = preg->asGreg ()->getRegNum ();
2053+ isSrcEvenAlign = (srcReg % 2 == 0 );
2054+ isSrcQuadAlign = (srcReg % 4 == 0 );
2055+ }
2056+ }
2057+
2058+ if (srcDcl->getRegFile () == G4_INPUT &&
2059+ srcDcl->getRegVar ()->getPhyReg () != NULL &&
2060+ srcDcl->getRegVar ()->getPhyReg ()->isGreg ()) {
2061+ srcReg = srcDcl->getRegVar ()->getPhyReg ()->asGreg ()->getRegNum ();
2062+ isSrcEvenAlign = (srcReg % 2 == 0 );
2063+ isSrcQuadAlign = (srcReg % 4 == 0 );
2064+ }
2065+
2066+ // if 1 GRF or less, no need to avoid
2067+ // if > 1 GRF, even align is an exception
2068+ // if > 2 GRFs, quad align is an exception
2069+ bool canAvoidOverlap = false ;
2070+
2071+ if (builder.supports4GRFAlign () && (dstMoreThan2GRF || srcMoreThan2GRF)) {
2072+ if (isDstQuadAlign && isSrcQuadAlign &&
2073+ srcOffset % 4 == dstOffset % 4 && dstMoreThan2GRF &&
2074+ srcMoreThan2GRF) {
2075+ canAvoidOverlap = true ;
2076+ }
2077+ } else if (dstMoreThan1GRF || srcMoreThan1GRF) {
2078+ if (isDstEvenAlign && isSrcEvenAlign &&
2079+ srcOffset % 2 == dstOffset % 2 && dstMoreThan1GRF &&
2080+ srcMoreThan1GRF) {
2081+ canAvoidOverlap = true ;
2082+ }
2083+ } else { // 1 GRF or less
2084+ canAvoidOverlap = true ;
2085+ }
2086+
2087+ if (!canAvoidOverlap) {
2088+ if (src->asSrcRegRegion ()->getBase ()->isRegAllocPartaker ()) {
2089+ unsigned srcId =
2090+ src->asSrcRegRegion ()->getBase ()->asRegVar ()->getId ();
20422091#ifdef DEBUG_VERBOSE_ON
2043- printf (" Src%d " , j);
2044- inst->dump ();
2092+ printf (" Src%d " , j);
2093+ inst->dump ();
20452094#endif
2046- if (isDstRegAllocPartaker) {
2047- if (!varSplitCheckBeforeIntf (dstId, srcId)) {
2048- checkAndSetIntf (dstId, srcId);
2049- buildInterferenceWithAllSubDcl (dstId, srcId);
2050- }
2051- } else {
2052- for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum;
2053- j++) {
2054- int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2055- if (!varSplitCheckBeforeIntf (k, srcId)) {
2056- checkAndSetIntf (k, srcId);
2057- buildInterferenceWithAllSubDcl (k, srcId);
2058- }
2059- }
2060- }
2061- } else if (gra.useLocalRA &&
2062- isDstRegAllocPartaker) {
2063- LocalLiveRange *localLR = NULL ;
2064- G4_Declare *topdcl = GetTopDclFromRegRegion (src);
2095+ if (isDstRegAllocPartaker) {
2096+ if (!varSplitCheckBeforeIntf (dstId, srcId)) {
2097+ checkAndSetIntf (dstId, srcId);
2098+ buildInterferenceWithAllSubDcl (dstId, srcId);
2099+ }
2100+ } else {
2101+ for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum; j++) {
2102+ int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2103+ if (!varSplitCheckBeforeIntf (k, srcId)) {
2104+ checkAndSetIntf (k, srcId);
2105+ buildInterferenceWithAllSubDcl (k, srcId);
2106+ }
2107+ }
2108+ }
2109+ } else if (gra.useLocalRA && isDstRegAllocPartaker) {
2110+ LocalLiveRange *localLR = NULL ;
2111+ G4_Declare *topdcl = GetTopDclFromRegRegion (src);
20652112
2066- if (topdcl)
2067- localLR = gra.getLocalLR (topdcl);
2113+ if (topdcl)
2114+ localLR = gra.getLocalLR (topdcl);
20682115
2069- if (localLR && localLR->getAssigned ()) {
2070- int reg, sreg, numrows;
2071- G4_VarBase *preg = localLR->getPhyReg (sreg);
2072- numrows = localLR->getTopDcl ()->getNumRows ();
2116+ if (localLR && localLR->getAssigned ()) {
2117+ int reg, sreg, numrows;
2118+ G4_VarBase *preg = localLR->getPhyReg (sreg);
2119+ numrows = localLR->getTopDcl ()->getNumRows ();
20732120
2074- vISA_ASSERT (preg->isGreg (), " Register in src was not GRF" );
2121+ vISA_ASSERT (preg->isGreg (), " Register in src was not GRF" );
20752122
2076- reg = preg->asGreg ()->getRegNum ();
2123+ reg = preg->asGreg ()->getRegNum ();
20772124#ifdef DEBUG_VERBOSE_ON
2078- printf (" Src%d " , j);
2079- inst->dump ();
2125+ printf (" Src%d " , j);
2126+ inst->dump ();
20802127#endif
2081- for (int j = reg, sum = reg + numrows; j < sum; j++) {
2082- int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2083- if (!varSplitCheckBeforeIntf (dstId, k)) {
2084- checkAndSetIntf (dstId, k);
2085- buildInterferenceWithAllSubDcl (dstId, k);
2086- }
2087- }
2088- }
2089- }
2128+ for (int j = reg, sum = reg + numrows; j < sum; j++) {
2129+ int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2130+ if (!varSplitCheckBeforeIntf (dstId, k)) {
2131+ checkAndSetIntf (dstId, k);
2132+ buildInterferenceWithAllSubDcl (dstId, k);
20902133 }
20912134 }
2092- } else if (srcRgn->getRegAccess () == IndirGRF) {
2093- // make every var in points-to set live
2094- const REGVAR_VECTOR &pointsToSet =
2095- liveAnalysis->getPointsToAnalysis ().getAllInPointsToOrIndrUse (
2096- srcRgn, bb);
2097- for (auto &pt : pointsToSet) {
2098- if (pt.var ->isRegAllocPartaker ()) {
2099- unsigned srcId = pt.var ->getId ();
2100- if (isDstRegAllocPartaker) {
2101- if (!varSplitCheckBeforeIntf (dstId, srcId)) {
2102- checkAndSetIntf (dstId, srcId);
2103- buildInterferenceWithAllSubDcl (dstId, srcId);
2104- }
2105- } else {
2106- for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum;
2107- j++) {
2108- int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2109- if (!varSplitCheckBeforeIntf (k, srcId)) {
2110- checkAndSetIntf (k, srcId);
2111- buildInterferenceWithAllSubDcl (k, srcId);
2112- }
2113- }
2114- }
2115- }
2135+ }
2136+ }
2137+ }
2138+ } else if (srcRgn->getRegAccess () == IndirGRF) {
2139+ // make every var in points-to set live
2140+ const REGVAR_VECTOR &pointsToSet =
2141+ liveAnalysis->getPointsToAnalysis ().getAllInPointsToOrIndrUse (srcRgn,
2142+ bb);
2143+ for (auto &pt : pointsToSet) {
2144+ if (!pt.var ->isRegAllocPartaker ()) {
2145+ continue ;
2146+ }
2147+ unsigned srcId = pt.var ->getId ();
2148+ if (isDstRegAllocPartaker) {
2149+ if (!varSplitCheckBeforeIntf (dstId, srcId)) {
2150+ checkAndSetIntf (dstId, srcId);
2151+ buildInterferenceWithAllSubDcl (dstId, srcId);
2152+ }
2153+ } else {
2154+ for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum; j++) {
2155+ int k = getGRFDclForHRA (j)->getRegVar ()->getId ();
2156+ if (varSplitCheckBeforeIntf (k, srcId)) {
2157+ continue ;
21162158 }
2159+ checkAndSetIntf (k, srcId);
2160+ buildInterferenceWithAllSubDcl (k, srcId);
21172161 }
21182162 }
21192163 }
21202164 }
21212165 }
2166+
2167+ return ;
21222168}
21232169
21242170uint32_t GlobalRA::getRefCount (int loopNestLevel) {
0 commit comments