Skip to content

Commit 8955b78

Browse files
sys-igcigcbot
authored andcommitted
[Autobackout][FunctionalRegression]Revert of change: b3e1d9a: IGA SWSB: Refactor dpas macro builder
Removed DpasMacroBuilder::getSuppressionBlockCandidate. Now the dpas macro is formed until a dpas is seen that cannot be in a macro, even if there is no suppression opportunity, i.e. no sources are the same within the macro. There is no performance drawback doing so. This also aligns with vISA's dpas macro logic.
1 parent e31e178 commit 8955b78

File tree

2 files changed

+99
-7
lines changed

2 files changed

+99
-7
lines changed

visa/iga/IGALibrary/IR/RegDeps.cpp

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -589,12 +589,19 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
589589
BitSet<> allSrcBits(m_dsBuilder.getGRF_LEN());
590590
BitSet<> allDstNoLastBits(m_dsBuilder.getGRF_LEN());
591591
BitSet<> allSrcNoLastBits(m_dsBuilder.getGRF_LEN());
592-
593-
SuppressBlock bptr(
594-
getNumberOfSuppresionGroups(srcIdx), srcIdx == 1 ? 8 : 4);
592+
SuppressBlockPtrTy bptr =
593+
getSuppressionBlockCandidate(startIt, srcIdx, allDstBits, allSrcBits,
594+
allDstNoLastBits, allSrcNoLastBits);
595+
if (!bptr)
596+
return 0;
595597

596598
size_t numSuppressed = 0;
597599
InstListIterator it = startIt;
600+
// advance inst iterator to the next instruction following the block
601+
// Note that this instruction must be a macro candidate, otherwise the
602+
// suppression block won't formed
603+
std::advance(it, bptr->size());
604+
assert(it != m_instList.end());
598605

599606

600607
// find until the last instruction that can be suppressed
@@ -615,14 +622,18 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
615622
if (srcOp.getDirRegName() != RegName::GRF_R)
616623
break;
617624

625+
// found the first instruction that can't be suppressed. Stop looking.
626+
if (!bptr->contains(srcOp.getDirRegRef().regNum))
627+
break;
628+
618629
bool skipSetLastBits = false;
619630
if (hasProducerConsumerDep(dst_range, src_range, allDstBits)) {
620631
break;
621632
}
622633

623634
// at this point, we can add this DPAS into the macro
624635
++numSuppressed;
625-
bptr.addRegRanges(src_range, src_extra_range, dst_range);
636+
bptr->addRegRanges(src_range, src_extra_range, dst_range);
626637
if (!skipSetLastBits) {
627638
allSrcNoLastBits = allSrcBits;
628639
allDstNoLastBits = allDstBits;
@@ -639,15 +650,76 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
639650
if (numSuppressed) {
640651
// at least one instruction can be suppressed, the candidate block can be in
641652
// the macro udpate register footprint into DepSet
642-
updateRegFootprintsToDepSets(bptr.allSrcRange, bptr.allExtraSrcRange,
643-
bptr.allDstRange);
653+
updateRegFootprintsToDepSets(bptr->allSrcRange, bptr->allExtraSrcRange,
654+
bptr->allDstRange);
644655

645656
// return the total instructions found can be in the macro
646-
return bptr.size() + numSuppressed;
657+
return bptr->size() + numSuppressed;
647658
}
648659
return 0;
649660
}
650661

662+
DepSetBuilder::DpasMacroBuilder::SuppressBlockPtrTy
663+
DepSetBuilder::DpasMacroBuilder::getSuppressionBlockCandidate(
664+
InstListIterator startIt, uint32_t srcIdx, BitSet<> &allDstBits,
665+
BitSet<> &allSrcBits, BitSet<> &allDstNoLastBits,
666+
BitSet<> &allSrcNoLastBits, int forceGroupNum) const {
667+
assert(srcIdx == 1 || srcIdx == 2);
668+
size_t maxGroupNum =
669+
forceGroupNum < 0 ? getNumberOfSuppresionGroups(srcIdx) : forceGroupNum;
670+
// return null if the given src can't be suppressed
671+
if (!maxGroupNum)
672+
return nullptr;
673+
674+
SuppressBlockPtrTy sb(new SuppressBlock(maxGroupNum, srcIdx == 1 ? 8 : 4));
675+
// try from the startIt to see if there are dpas sequence that can form the
676+
// suppression block check number of maxGroupSize to find the first one block
677+
// those can potentially be suppressed
678+
InstListIterator it = startIt;
679+
for (size_t i = 0; i < maxGroupNum; ++i) {
680+
InstListIterator nextIt = it;
681+
++nextIt;
682+
// if next instruction is not a suppression candidate, there's no chance to
683+
// form a suppression block, return nullptr directly
684+
if (nextIt == m_instList.end())
685+
return nullptr;
686+
if (nextIsNotMacroCandidate(**it, **nextIt))
687+
return nullptr;
688+
if (!srcIsSuppressCandidate(**it, srcIdx))
689+
return nullptr;
690+
SrcRegRangeType src_range, src_extra_range;
691+
DstRegRangeType dst_range;
692+
m_inps.getDpasSrcDependency(**it, src_range, src_extra_range, m_model);
693+
m_inps.getDpasDstDependency(**it, dst_range);
694+
if (hasInternalDep(**it, dst_range, src_range,
695+
GetDpasSystolicDepth((*it)->getDpasFc()) == 8))
696+
return nullptr;
697+
698+
bool skipSetLastBits = false;
699+
if (hasProducerConsumerDep(dst_range, src_range, allDstBits)) {
700+
return nullptr;
701+
}
702+
uint16_t reg = (*it)->getSource(srcIdx).getDirRegRef().regNum;
703+
if (sb->partialOverlapped(reg))
704+
return nullptr;
705+
706+
// found the first duplicated register, the block is formed
707+
if (sb->contains(reg))
708+
break;
709+
sb->addRegs(reg);
710+
sb->addRegRanges(src_range, src_extra_range, dst_range);
711+
if (!skipSetLastBits) {
712+
allSrcNoLastBits = allSrcBits;
713+
allDstNoLastBits = allDstBits;
714+
}
715+
setDstSrcBits(src_range, dst_range, allSrcBits, allDstBits);
716+
++it;
717+
}
718+
719+
assert(sb->size());
720+
return sb;
721+
}
722+
651723
bool DepSetBuilder::DpasMacroBuilder::srcIsSuppressCandidate(
652724
const Instruction &inst, uint32_t srcIdx) const {
653725
// src1 always can be the candidate since all dpas depth must be the same

visa/iga/IGALibrary/IR/RegDeps.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ class DepSetBuilder {
596596
}
597597

598598
}; // SuppressionBlock
599+
typedef std::unique_ptr<SuppressBlock> SuppressBlockPtrTy;
599600

600601
// get the max number of suppression groups according to srcIdx and platform
601602
size_t getNumberOfSuppresionGroups(uint32_t srcIdx) const;
@@ -605,6 +606,25 @@ class DepSetBuilder {
605606
// instructions found
606607
size_t formSrcSuppressionBlock(InstListIterator startIt, uint32_t srcIdx);
607608

609+
// return the candidate SuppressBlock that is found fulfilling read
610+
// suppression requirement of given src index, start from the give
611+
// instruction. This block is the first candidate block of instructions
612+
// register those can be suppressed. Will need to check if the following
613+
// instructions having the same registers so that they can actually being
614+
// suppressed.
615+
// * return nullptr if there is no chance to suppress the given src
616+
// * allDstBits, allSrcBits - all used grf bits in the return suppressBlock
617+
// * allDstNoLastBits, allSrcNoLastBits - all used grf in the return
618+
// suppressBlock except the
619+
// last instruction's
620+
// * forceGroupNum - force to use the given value as maximum number of
621+
// suppression groups instead of
622+
// getting it from getNumberOfSuppresionGroups
623+
SuppressBlockPtrTy getSuppressionBlockCandidate(
624+
InstListIterator startIt, uint32_t srcIdx, BitSet<> &allDstBits,
625+
BitSet<> &allSrcBits, BitSet<> &allDstNoLastBits,
626+
BitSet<> &allSrcNoLastBits, int forceGroupNum = -1) const;
627+
608628
bool srcIsSuppressCandidate(const Instruction &inst, uint32_t srcIdx) const;
609629

610630
private:

0 commit comments

Comments
 (0)