@@ -788,13 +788,35 @@ void CISA_IR_Builder::LinkTimeOptimization(
788788
789789 };
790790
791+ auto getBeginIt = [&](std::list<vISA::G4_INST*>::iterator it)
792+ {
793+ // Trace backward until it reaches an update for SP
794+ // This is where we start to push spilled arguments onto stack
795+ auto beginIt = it;
796+ for (; beginIt != callerInsts.begin (); --beginIt)
797+ {
798+ G4_INST *inst = *beginIt;
799+ for (int i = 0 , numSrc = inst->getNumSrc (); i < numSrc; ++i)
800+ {
801+ G4_Declare* rootDcl = getRootDeclare (inst->getSrc (i));
802+ if (!rootDcl) continue ;
803+ G4_Operand *dst = inst->getDst ();
804+ if (rootDcl == callerBuilder->getFE_SP ())
805+ {
806+ // the dst is updating SP
807+ if (dst->getTopDcl () == callerBuilder->getFE_SP ())
808+ return beginIt;
809+ }
810+ }
811+ }
812+ return it;
813+ };
814+
791815 // A list of store in order to perform store-to-load forwarding
792816 std::list<std::list<vISA::G4_INST*>::iterator> storeList;
793817
794- // SP will be updated twice if it has arguments storing on stack
795- // The first update is for storing private variables
796- int updateCountSP = 0 ;
797- for (auto callerIt = callerInsts.begin (); callerIt != it; callerIt ++)
818+ auto beginIt = getBeginIt (it);
819+ for (auto callerIt = beginIt; callerIt != it; callerIt ++)
798820 {
799821 G4_INST *inst = *callerIt;
800822 for (int i = 0 , numSrc = inst->getNumSrc (); i < numSrc; ++i)
@@ -806,17 +828,15 @@ void CISA_IR_Builder::LinkTimeOptimization(
806828 {
807829 stackPointers[dst->getTopDcl ()] = getPointerOffset (inst, stackPointers[rootDcl]);
808830 defInst[dst->getTopDcl ()] = callerIt;
809- bool removeFrameCacl = (removeStackFrame && updateCountSP == 2 );
810- std::string prefix = removeFrameCacl ? " removeFrame " : " " ;
831+ // beginIt is the update of SP before pushing arguments onto stack
832+ // We do not remove it immediately since we don't know if all S2L can be perform at this stage
833+ std::string prefix = (removeStackFrame && callerIt != beginIt) ? " removeFrame " : " " ;
811834 DEBUG_PRINT (prefix << " (" << stackPointers[dst->getTopDcl ()] << " ) " );
812835 DEBUG_UTIL (inst->dump ());
813- if (removeFrameCacl )
836+ if (removeStackFrame && callerIt != beginIt )
814837 {
815838 callerInsts.erase (callerIt);
816839 }
817- // the dst is updating SP
818- if (dst->getTopDcl () == callerBuilder->getFE_SP ())
819- updateCountSP++;
820840 }
821841 else if (stackPointers.find (rootDcl) != stackPointers.end ())
822842 {
@@ -842,8 +862,7 @@ void CISA_IR_Builder::LinkTimeOptimization(
842862 {
843863 assert (i == 0 );
844864 // Start adding argument stores to the list
845- if (updateCountSP == 2 )
846- storeList.push_back (callerIt);
865+ storeList.push_back (callerIt);
847866 DEBUG_PRINT (" [ ]" );
848867 DEBUG_UTIL (inst->dump ());
849868 }
@@ -854,7 +873,6 @@ void CISA_IR_Builder::LinkTimeOptimization(
854873 }
855874 }
856875 }
857- assert (updateCountSP <= 2 && " SP has been updated more than twice from the caller" );
858876 // passing SP offset from caller to callee
859877 stackPointers[calleeBuilder->getFE_SP ()] = stackPointers[callerBuilder->getFE_SP ()];
860878 stackPointers[calleeBuilder->getFE_FP ()] = stackPointers[callerBuilder->getFE_FP ()];
0 commit comments