Skip to content

Commit 2b8f8d6

Browse files
bcheng0127igcbot
authored andcommitted
Changes in code.
1 parent 8d812d1 commit 2b8f8d6

File tree

2 files changed

+194
-145
lines changed

2 files changed

+194
-145
lines changed

visa/GraphColor.cpp

Lines changed: 189 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,177 +1948,223 @@ void Interference::markInterferenceToAvoidDstSrcOverlap(G4_BB *bb,
19481948
bool isDstLocallyAssigned = false;
19491949
unsigned dstId = 0;
19501950
int dstPreg = 0, dstNumRows = 0;
1951-
bool dstOpndNumRows = false;
1951+
bool dstMoreThan1GRF = false;
1952+
bool dstMoreThan2GRF = false;
19521953

19531954
G4_DstRegRegion *dst = inst->getDst();
1954-
if (dst->getBase()->isRegVar() &&
1955-
(dst->getTopDcl()->getRegFile() == G4_GRF)) {
1956-
G4_Declare *dstDcl = dst->getTopDcl();
1957-
int dstOffset = dst->getLeftBound() / kernel.numEltPerGRF<Type_UB>();
1958-
bool isDstEvenAlign = gra.isEvenAligned(dstDcl);
1955+
if (!dst->getBase()->isRegVar() || dst->getTopDcl()->getRegFile() != G4_GRF) {
1956+
return;
1957+
}
19591958

1960-
if (dst->getBase()->isRegAllocPartaker()) {
1961-
isDstRegAllocPartaker = true;
1962-
dstId = ((G4_RegVar *)dst->getBase())->getId();
1963-
dstOpndNumRows = dst->getSubRegOff() * dst->getTypeSize() +
1964-
dst->getLinearizedEnd() - dst->getLinearizedStart() +
1965-
1 >
1966-
kernel.numEltPerGRF<Type_UB>();
1967-
} else if (gra.useLocalRA) {
1968-
LocalLiveRange *localLR = NULL;
1969-
G4_Declare *topdcl = GetTopDclFromRegRegion(dst);
1959+
G4_Declare *dstDcl = dst->getTopDcl();
1960+
int dstOffset = dst->getLeftBound() / kernel.numEltPerGRF<Type_UB>();
1961+
bool isDstEvenAlign = gra.isEvenAligned(dstDcl);
1962+
bool isDstQuadAlign = gra.isQuadAligned(dstDcl);
19701963

1971-
if (topdcl)
1972-
localLR = gra.getLocalLR(topdcl);
1973-
if (localLR && localLR->getAssigned()) {
1974-
int sreg;
1975-
G4_VarBase *preg = localLR->getPhyReg(sreg);
1964+
if (dst->getBase()->isRegAllocPartaker()) {
1965+
isDstRegAllocPartaker = true;
1966+
dstId = ((G4_RegVar *)dst->getBase())->getId();
1967+
unsigned dstGRFByteSizeWithOffset =
1968+
dst->getSubRegOff() * dst->getTypeSize() + dst->getLinearizedEnd() -
1969+
dst->getLinearizedStart() + 1;
1970+
dstMoreThan1GRF = dstGRFByteSizeWithOffset > kernel.numEltPerGRF<Type_UB>();
1971+
dstMoreThan2GRF =
1972+
dstGRFByteSizeWithOffset > (2 * kernel.numEltPerGRF<Type_UB>());
1973+
} else if (gra.useLocalRA) {
1974+
LocalLiveRange *localLR = NULL;
1975+
G4_Declare *topdcl = GetTopDclFromRegRegion(dst);
1976+
1977+
if (topdcl)
1978+
localLR = gra.getLocalLR(topdcl);
1979+
if (localLR && localLR->getAssigned()) {
1980+
int sreg;
1981+
G4_VarBase *preg = localLR->getPhyReg(sreg);
1982+
1983+
vISA_ASSERT(preg->isGreg(), "Register in dst was not GRF");
1984+
1985+
isDstLocallyAssigned = true;
1986+
dstPreg = preg->asGreg()->getRegNum();
1987+
dstNumRows = localLR->getTopDcl()->getNumRows();
1988+
1989+
unsigned dstGRFByteSizeWithOffset =
1990+
dst->getSubRegOff() * dst->getTypeSize() + dst->getLinearizedEnd() -
1991+
dst->getLinearizedStart() + 1;
1992+
dstMoreThan1GRF =
1993+
dstGRFByteSizeWithOffset > kernel.numEltPerGRF<Type_UB>();
1994+
dstMoreThan2GRF =
1995+
dstGRFByteSizeWithOffset > (2 * kernel.numEltPerGRF<Type_UB>());
1996+
1997+
isDstEvenAlign = (dstPreg % 2 == 0);
1998+
isDstQuadAlign = (dstPreg % 4 == 0);
1999+
}
2000+
}
2001+
2002+
if (!isDstRegAllocPartaker && !isDstLocallyAssigned) {
2003+
return;
2004+
}
19762005

1977-
vISA_ASSERT(preg->isGreg(), "Register in dst was not GRF");
2006+
for (unsigned j = 0, numSrc = inst->getNumSrc(); j < numSrc; j++) {
2007+
if (inst->isDpas() && j != 1)
2008+
continue;
2009+
G4_Operand *src = inst->getSrc(j);
19782010

1979-
isDstLocallyAssigned = true;
1980-
dstPreg = preg->asGreg()->getRegNum();
1981-
dstNumRows = localLR->getTopDcl()->getNumRows();
1982-
dstOpndNumRows = dst->getSubRegOff() * dst->getTypeSize() +
1983-
dst->getLinearizedEnd() - dst->getLinearizedStart() + 1 >
1984-
kernel.numEltPerGRF<Type_UB>();
1985-
isDstEvenAlign = (dstPreg % 2 == 0);
1986-
}
2011+
if (src == NULL) {
2012+
continue;
2013+
}
2014+
if (!src->isSrcRegRegion()) {
2015+
continue;
2016+
}
2017+
if (!src->asSrcRegRegion()->getBase()->isRegVar()) {
2018+
continue;
19872019
}
19882020

1989-
if (isDstRegAllocPartaker || isDstLocallyAssigned) {
1990-
for (unsigned j = 0, numSrc = inst->getNumSrc(); j < numSrc; j++) {
1991-
if (inst->isDpas() && j != 1)
1992-
continue;
1993-
G4_Operand *src = inst->getSrc(j);
1994-
if (src != NULL && src->isSrcRegRegion() &&
1995-
src->asSrcRegRegion()->getBase()->isRegVar()) {
1996-
G4_SrcRegRegion *srcRgn = src->asSrcRegRegion();
1997-
G4_Declare *srcDcl = src->getTopDcl();
1998-
if (srcRgn->getRegAccess() == Direct &&
1999-
(src->getTopDcl()->getRegFile() == G4_GRF ||
2000-
src->getTopDcl()->getRegFile() == G4_INPUT)) {
2001-
int srcOffset =
2002-
src->getLeftBound() / kernel.numEltPerGRF<Type_UB>();
2003-
bool srcOpndNumRows =
2004-
srcRgn->getSubRegOff() * srcRgn->getTypeSize() +
2005-
srcRgn->getLinearizedEnd() -
2006-
srcRgn->getLinearizedStart() + 1 >
2007-
kernel.numEltPerGRF<Type_UB>();
2008-
2009-
int srcReg = 0;
2010-
bool isSrcEvenAlign = gra.isEvenAligned(srcDcl);
2011-
if (!src->asSrcRegRegion()->getBase()->isRegAllocPartaker() &&
2012-
gra.useLocalRA) {
2013-
int sreg;
2014-
LocalLiveRange *localLR = NULL;
2015-
G4_Declare *topdcl = GetTopDclFromRegRegion(src);
2021+
G4_SrcRegRegion *srcRgn = src->asSrcRegRegion();
2022+
G4_Declare *srcDcl = src->getTopDcl();
2023+
if (srcRgn->getRegAccess() == Direct &&
2024+
(src->getTopDcl()->getRegFile() == G4_GRF ||
2025+
src->getTopDcl()->getRegFile() == G4_INPUT)) {
2026+
int srcOffset = src->getLeftBound() / kernel.numEltPerGRF<Type_UB>();
2027+
unsigned srcGRFByteSizeWithOffset =
2028+
srcRgn->getSubRegOff() * srcRgn->getTypeSize() +
2029+
srcRgn->getLinearizedEnd() - srcRgn->getLinearizedStart() + 1;
20162030

2017-
if (topdcl)
2018-
localLR = gra.getLocalLR(topdcl);
2019-
if (localLR && localLR->getAssigned()) {
2020-
G4_VarBase *preg = localLR->getPhyReg(sreg);
2031+
bool srcMoreThan1GRF =
2032+
srcGRFByteSizeWithOffset > kernel.numEltPerGRF<Type_UB>();
2033+
bool srcMoreThan2GRF =
2034+
srcGRFByteSizeWithOffset > (2 * kernel.numEltPerGRF<Type_UB>());
20212035

2022-
vISA_ASSERT(preg->isGreg(), "Register in src was not GRF");
2023-
srcReg = preg->asGreg()->getRegNum();
2024-
isSrcEvenAlign = (srcReg % 2 == 0);
2025-
}
2026-
}
2036+
int srcReg = 0;
2037+
bool isSrcEvenAlign = gra.isEvenAligned(srcDcl);
2038+
bool isSrcQuadAlign = gra.isQuadAligned(srcDcl);
20272039

2028-
if (srcDcl->getRegFile() == G4_INPUT &&
2029-
srcDcl->getRegVar()->getPhyReg() != NULL &&
2030-
srcDcl->getRegVar()->getPhyReg()->isGreg()) {
2031-
srcReg = srcDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
2032-
isSrcEvenAlign = (srcReg % 2 == 0);
2033-
}
2040+
if (!src->asSrcRegRegion()->getBase()->isRegAllocPartaker() &&
2041+
gra.useLocalRA) {
2042+
int sreg;
2043+
LocalLiveRange *localLR = NULL;
2044+
G4_Declare *topdcl = GetTopDclFromRegRegion(src);
20342045

2035-
if (dstOpndNumRows || srcOpndNumRows) {
2036-
if (!(isDstEvenAlign && isSrcEvenAlign &&
2037-
srcOffset % 2 == dstOffset % 2 && dstOpndNumRows &&
2038-
srcOpndNumRows)) {
2039-
if (src->asSrcRegRegion()->getBase()->isRegAllocPartaker()) {
2040-
unsigned srcId =
2041-
src->asSrcRegRegion()->getBase()->asRegVar()->getId();
2046+
if (topdcl)
2047+
localLR = gra.getLocalLR(topdcl);
2048+
if (localLR && localLR->getAssigned()) {
2049+
G4_VarBase *preg = localLR->getPhyReg(sreg);
2050+
2051+
vISA_ASSERT(preg->isGreg(), "Register in src was not GRF");
2052+
srcReg = preg->asGreg()->getRegNum();
2053+
isSrcEvenAlign = (srcReg % 2 == 0);
2054+
isSrcQuadAlign = (srcReg % 4 == 0);
2055+
}
2056+
}
2057+
2058+
if (srcDcl->getRegFile() == G4_INPUT &&
2059+
srcDcl->getRegVar()->getPhyReg() != NULL &&
2060+
srcDcl->getRegVar()->getPhyReg()->isGreg()) {
2061+
srcReg = srcDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
2062+
isSrcEvenAlign = (srcReg % 2 == 0);
2063+
isSrcQuadAlign = (srcReg % 4 == 0);
2064+
}
2065+
2066+
// if 1 GRF or less, no need to avoid
2067+
// if > 1 GRF, even align is an exception
2068+
// if > 2 GRFs, quad align is an exception
2069+
bool canAvoidOverlap = false;
2070+
2071+
if (builder.supports4GRFAlign() && (dstMoreThan2GRF || srcMoreThan2GRF)) {
2072+
if (isDstQuadAlign && isSrcQuadAlign &&
2073+
srcOffset % 4 == dstOffset % 4 && dstMoreThan2GRF &&
2074+
srcMoreThan2GRF) {
2075+
canAvoidOverlap = true;
2076+
}
2077+
} else if (dstMoreThan1GRF || srcMoreThan1GRF) {
2078+
if (isDstEvenAlign && isSrcEvenAlign &&
2079+
srcOffset % 2 == dstOffset % 2 && dstMoreThan1GRF &&
2080+
srcMoreThan1GRF) {
2081+
canAvoidOverlap = true;
2082+
}
2083+
} else { // 1 GRF or less
2084+
canAvoidOverlap = true;
2085+
}
2086+
2087+
if (!canAvoidOverlap) {
2088+
if (src->asSrcRegRegion()->getBase()->isRegAllocPartaker()) {
2089+
unsigned srcId =
2090+
src->asSrcRegRegion()->getBase()->asRegVar()->getId();
20422091
#ifdef DEBUG_VERBOSE_ON
2043-
printf("Src%d ", j);
2044-
inst->dump();
2092+
printf("Src%d ", j);
2093+
inst->dump();
20452094
#endif
2046-
if (isDstRegAllocPartaker) {
2047-
if (!varSplitCheckBeforeIntf(dstId, srcId)) {
2048-
checkAndSetIntf(dstId, srcId);
2049-
buildInterferenceWithAllSubDcl(dstId, srcId);
2050-
}
2051-
} else {
2052-
for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum;
2053-
j++) {
2054-
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2055-
if (!varSplitCheckBeforeIntf(k, srcId)) {
2056-
checkAndSetIntf(k, srcId);
2057-
buildInterferenceWithAllSubDcl(k, srcId);
2058-
}
2059-
}
2060-
}
2061-
} else if (gra.useLocalRA &&
2062-
isDstRegAllocPartaker) {
2063-
LocalLiveRange *localLR = NULL;
2064-
G4_Declare *topdcl = GetTopDclFromRegRegion(src);
2095+
if (isDstRegAllocPartaker) {
2096+
if (!varSplitCheckBeforeIntf(dstId, srcId)) {
2097+
checkAndSetIntf(dstId, srcId);
2098+
buildInterferenceWithAllSubDcl(dstId, srcId);
2099+
}
2100+
} else {
2101+
for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum; j++) {
2102+
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2103+
if (!varSplitCheckBeforeIntf(k, srcId)) {
2104+
checkAndSetIntf(k, srcId);
2105+
buildInterferenceWithAllSubDcl(k, srcId);
2106+
}
2107+
}
2108+
}
2109+
} else if (gra.useLocalRA && isDstRegAllocPartaker) {
2110+
LocalLiveRange *localLR = NULL;
2111+
G4_Declare *topdcl = GetTopDclFromRegRegion(src);
20652112

2066-
if (topdcl)
2067-
localLR = gra.getLocalLR(topdcl);
2113+
if (topdcl)
2114+
localLR = gra.getLocalLR(topdcl);
20682115

2069-
if (localLR && localLR->getAssigned()) {
2070-
int reg, sreg, numrows;
2071-
G4_VarBase *preg = localLR->getPhyReg(sreg);
2072-
numrows = localLR->getTopDcl()->getNumRows();
2116+
if (localLR && localLR->getAssigned()) {
2117+
int reg, sreg, numrows;
2118+
G4_VarBase *preg = localLR->getPhyReg(sreg);
2119+
numrows = localLR->getTopDcl()->getNumRows();
20732120

2074-
vISA_ASSERT(preg->isGreg(), "Register in src was not GRF");
2121+
vISA_ASSERT(preg->isGreg(), "Register in src was not GRF");
20752122

2076-
reg = preg->asGreg()->getRegNum();
2123+
reg = preg->asGreg()->getRegNum();
20772124
#ifdef DEBUG_VERBOSE_ON
2078-
printf("Src%d ", j);
2079-
inst->dump();
2125+
printf("Src%d ", j);
2126+
inst->dump();
20802127
#endif
2081-
for (int j = reg, sum = reg + numrows; j < sum; j++) {
2082-
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2083-
if (!varSplitCheckBeforeIntf(dstId, k)) {
2084-
checkAndSetIntf(dstId, k);
2085-
buildInterferenceWithAllSubDcl(dstId, k);
2086-
}
2087-
}
2088-
}
2089-
}
2128+
for (int j = reg, sum = reg + numrows; j < sum; j++) {
2129+
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2130+
if (!varSplitCheckBeforeIntf(dstId, k)) {
2131+
checkAndSetIntf(dstId, k);
2132+
buildInterferenceWithAllSubDcl(dstId, k);
20902133
}
20912134
}
2092-
} else if (srcRgn->getRegAccess() == IndirGRF) {
2093-
// make every var in points-to set live
2094-
const REGVAR_VECTOR &pointsToSet =
2095-
liveAnalysis->getPointsToAnalysis().getAllInPointsToOrIndrUse(
2096-
srcRgn, bb);
2097-
for (auto &pt : pointsToSet) {
2098-
if (pt.var->isRegAllocPartaker()) {
2099-
unsigned srcId = pt.var->getId();
2100-
if (isDstRegAllocPartaker) {
2101-
if (!varSplitCheckBeforeIntf(dstId, srcId)) {
2102-
checkAndSetIntf(dstId, srcId);
2103-
buildInterferenceWithAllSubDcl(dstId, srcId);
2104-
}
2105-
} else {
2106-
for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum;
2107-
j++) {
2108-
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2109-
if (!varSplitCheckBeforeIntf(k, srcId)) {
2110-
checkAndSetIntf(k, srcId);
2111-
buildInterferenceWithAllSubDcl(k, srcId);
2112-
}
2113-
}
2114-
}
2115-
}
2135+
}
2136+
}
2137+
}
2138+
} else if (srcRgn->getRegAccess() == IndirGRF) {
2139+
// make every var in points-to set live
2140+
const REGVAR_VECTOR &pointsToSet =
2141+
liveAnalysis->getPointsToAnalysis().getAllInPointsToOrIndrUse(srcRgn,
2142+
bb);
2143+
for (auto &pt : pointsToSet) {
2144+
if (!pt.var->isRegAllocPartaker()) {
2145+
continue;
2146+
}
2147+
unsigned srcId = pt.var->getId();
2148+
if (isDstRegAllocPartaker) {
2149+
if (!varSplitCheckBeforeIntf(dstId, srcId)) {
2150+
checkAndSetIntf(dstId, srcId);
2151+
buildInterferenceWithAllSubDcl(dstId, srcId);
2152+
}
2153+
} else {
2154+
for (int j = dstPreg, sum = dstPreg + dstNumRows; j < sum; j++) {
2155+
int k = getGRFDclForHRA(j)->getRegVar()->getId();
2156+
if (varSplitCheckBeforeIntf(k, srcId)) {
2157+
continue;
21162158
}
2159+
checkAndSetIntf(k, srcId);
2160+
buildInterferenceWithAllSubDcl(k, srcId);
21172161
}
21182162
}
21192163
}
21202164
}
21212165
}
2166+
2167+
return;
21222168
}
21232169

21242170
uint32_t GlobalRA::getRefCount(int loopNestLevel) {

visa/SpillManagerGMRF.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,14 +1127,17 @@ SpillManagerGRF::createTemporaryRangeDeclare(G4_DstRegRegion *spilledRegion,
11271127
// non-zero
11281128
byteSize += spilledRegion->getSubRegOff() * spilledRegion->getElemSize();
11291129

1130-
vASSERT(byteSize <= 2u * builder_->numEltPerGRF<Type_UB>());
1130+
vASSERT(byteSize <= 4u * builder_->numEltPerGRF<Type_UB>());
11311131
vASSERT(byteSize % spilledRegion->getElemSize() == 0);
11321132

11331133
G4_Type type = spilledRegion->getType();
11341134
DeclareType regVarKind = DeclareType::Tmp;
11351135

11361136
unsigned short width, height;
1137-
if (byteSize > builder_->numEltPerGRF<Type_UB>()) {
1137+
if (byteSize > (2 * builder_->numEltPerGRF<Type_UB>())) {
1138+
height = 4;
1139+
width = builder_->numEltPerGRF<Type_UB>() / spilledRegion->getElemSize();
1140+
} else if (byteSize > builder_->numEltPerGRF<Type_UB>()) {
11381141
height = 2;
11391142
width = builder_->numEltPerGRF<Type_UB>() / spilledRegion->getElemSize();
11401143
} else {

0 commit comments

Comments
 (0)