@@ -589,12 +589,19 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
589589 BitSet<> allSrcBits (m_dsBuilder.getGRF_LEN ());
590590 BitSet<> allDstNoLastBits (m_dsBuilder.getGRF_LEN ());
591591 BitSet<> allSrcNoLastBits (m_dsBuilder.getGRF_LEN ());
592-
593- SuppressBlock bptr (
594- getNumberOfSuppresionGroups (srcIdx), srcIdx == 1 ? 8 : 4 );
592+ SuppressBlockPtrTy bptr =
593+ getSuppressionBlockCandidate (startIt, srcIdx, allDstBits, allSrcBits,
594+ allDstNoLastBits, allSrcNoLastBits);
595+ if (!bptr)
596+ return 0 ;
595597
596598 size_t numSuppressed = 0 ;
597599 InstListIterator it = startIt;
600+ // advance inst iterator to the next instruction following the block
601+ // Note that this instruction must be a macro candidate, otherwise the
602+ // suppression block won't formed
603+ std::advance (it, bptr->size ());
604+ assert (it != m_instList.end ());
598605
599606
600607 // find until the last instruction that can be suppressed
@@ -615,14 +622,18 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
615622 if (srcOp.getDirRegName () != RegName::GRF_R)
616623 break ;
617624
625+ // found the first instruction that can't be suppressed. Stop looking.
626+ if (!bptr->contains (srcOp.getDirRegRef ().regNum ))
627+ break ;
628+
618629 bool skipSetLastBits = false ;
619630 if (hasProducerConsumerDep (dst_range, src_range, allDstBits)) {
620631 break ;
621632 }
622633
623634 // at this point, we can add this DPAS into the macro
624635 ++numSuppressed;
625- bptr. addRegRanges (src_range, src_extra_range, dst_range);
636+ bptr-> addRegRanges (src_range, src_extra_range, dst_range);
626637 if (!skipSetLastBits) {
627638 allSrcNoLastBits = allSrcBits;
628639 allDstNoLastBits = allDstBits;
@@ -639,15 +650,76 @@ size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
639650 if (numSuppressed) {
640651 // at least one instruction can be suppressed, the candidate block can be in
641652 // the macro udpate register footprint into DepSet
642- updateRegFootprintsToDepSets (bptr. allSrcRange , bptr. allExtraSrcRange ,
643- bptr. allDstRange );
653+ updateRegFootprintsToDepSets (bptr-> allSrcRange , bptr-> allExtraSrcRange ,
654+ bptr-> allDstRange );
644655
645656 // return the total instructions found can be in the macro
646- return bptr. size () + numSuppressed;
657+ return bptr-> size () + numSuppressed;
647658 }
648659 return 0 ;
649660}
650661
662+ DepSetBuilder::DpasMacroBuilder::SuppressBlockPtrTy
663+ DepSetBuilder::DpasMacroBuilder::getSuppressionBlockCandidate (
664+ InstListIterator startIt, uint32_t srcIdx, BitSet<> &allDstBits,
665+ BitSet<> &allSrcBits, BitSet<> &allDstNoLastBits,
666+ BitSet<> &allSrcNoLastBits, int forceGroupNum) const {
667+ assert (srcIdx == 1 || srcIdx == 2 );
668+ size_t maxGroupNum =
669+ forceGroupNum < 0 ? getNumberOfSuppresionGroups (srcIdx) : forceGroupNum;
670+ // return null if the given src can't be suppressed
671+ if (!maxGroupNum)
672+ return nullptr ;
673+
674+ SuppressBlockPtrTy sb (new SuppressBlock (maxGroupNum, srcIdx == 1 ? 8 : 4 ));
675+ // try from the startIt to see if there are dpas sequence that can form the
676+ // suppression block check number of maxGroupSize to find the first one block
677+ // those can potentially be suppressed
678+ InstListIterator it = startIt;
679+ for (size_t i = 0 ; i < maxGroupNum; ++i) {
680+ InstListIterator nextIt = it;
681+ ++nextIt;
682+ // if next instruction is not a suppression candidate, there's no chance to
683+ // form a suppression block, return nullptr directly
684+ if (nextIt == m_instList.end ())
685+ return nullptr ;
686+ if (nextIsNotMacroCandidate (**it, **nextIt))
687+ return nullptr ;
688+ if (!srcIsSuppressCandidate (**it, srcIdx))
689+ return nullptr ;
690+ SrcRegRangeType src_range, src_extra_range;
691+ DstRegRangeType dst_range;
692+ m_inps.getDpasSrcDependency (**it, src_range, src_extra_range, m_model);
693+ m_inps.getDpasDstDependency (**it, dst_range);
694+ if (hasInternalDep (**it, dst_range, src_range,
695+ GetDpasSystolicDepth ((*it)->getDpasFc ()) == 8 ))
696+ return nullptr ;
697+
698+ bool skipSetLastBits = false ;
699+ if (hasProducerConsumerDep (dst_range, src_range, allDstBits)) {
700+ return nullptr ;
701+ }
702+ uint16_t reg = (*it)->getSource (srcIdx).getDirRegRef ().regNum ;
703+ if (sb->partialOverlapped (reg))
704+ return nullptr ;
705+
706+ // found the first duplicated register, the block is formed
707+ if (sb->contains (reg))
708+ break ;
709+ sb->addRegs (reg);
710+ sb->addRegRanges (src_range, src_extra_range, dst_range);
711+ if (!skipSetLastBits) {
712+ allSrcNoLastBits = allSrcBits;
713+ allDstNoLastBits = allDstBits;
714+ }
715+ setDstSrcBits (src_range, dst_range, allSrcBits, allDstBits);
716+ ++it;
717+ }
718+
719+ assert (sb->size ());
720+ return sb;
721+ }
722+
651723bool DepSetBuilder::DpasMacroBuilder::srcIsSuppressCandidate (
652724 const Instruction &inst, uint32_t srcIdx) const {
653725 // src1 always can be the candidate since all dpas depth must be the same
0 commit comments