@@ -399,141 +399,49 @@ int CISA_IR_Builder::AddFunction(VISAFunction *& function, const char* functionN
399399// default size of the physical reg pool mem manager in bytes
400400#define PHY_REG_MEM_SIZE (16 *1024 )
401401
402- struct FCallState
403- {
404- G4_INST* fcallInst;
405- G4_Operand* opnd0;
406- G4_Operand* opnd1;
407- G4_BB* retBlock;
408- unsigned int execSize;
409- };
410-
411- struct SavedFCallStates
412- {
413- std::vector<std::pair<G4_Kernel*, FCallState>> states;
414- std::vector<G4_BB*> retbbs;
415- };
416-
417- void saveFCallState (G4_Kernel* kernel, SavedFCallStates& savedFCallState)
418- {
419- // Iterate over all BBs in kernel.
420- // For each fcall seen, store its opnd0, opnd1, retBlock.
421- // so that after compiling the copy of function for 1 kernel,
422- // the IR can be reused for another kernel rather than
423- // recompiling.
424- // kernel points to a stackcall function.
425- std::set<G4_BB*> calledFrets;
426- for (auto curBB : kernel->fg )
427- {
428- if ( curBB->size () > 0 && curBB->isEndWithFCall () )
429- {
430- // Save state for this fcall
431- G4_INST* fcallInst = curBB->back ();
432-
433- FCallState currFCallState;
434-
435- currFCallState.fcallInst = fcallInst;
436- currFCallState.opnd0 = fcallInst->getSrc (0 );
437- currFCallState.opnd1 = fcallInst->getSrc (1 );
438- currFCallState.retBlock = curBB->Succs .front ();
439- currFCallState.execSize = fcallInst->getExecSize ();
440-
441- savedFCallState.states .push_back ( std::make_pair ( kernel, currFCallState ) );
442- calledFrets.insert (currFCallState.retBlock );
443- }
444- if (curBB->size () > 0 && curBB->isEndWithFRet () && !calledFrets.count (curBB))
445- {
446- savedFCallState.retbbs .push_back (curBB);
447- }
448- }
449- }
450-
451- void restoreFCallState (G4_Kernel* kernel, SavedFCallStates savedFCallState)
402+ void restoreFCallState (G4_Kernel* kernel, const std::map<G4_BB*, G4_INST*>& savedFCallState)
452403{
453404 // Iterate over all BBs in kernel and fix all fcalls converted
454405 // to calls by reconverting them to fcall. This is required
455406 // because we want to reuse IR of function for next kernel.
456407
457- // start, end iterators denote boundaries in vector that correspond
458- // to current kernel. This assumes that entries for different
459- // functions are not interspersed.
460- auto start = savedFCallState.states .begin (), end = savedFCallState.states .end ();
461-
462- for ( BB_LIST_ITER bb_it = kernel->fg .begin ();
463- bb_it != kernel->fg .end ();
464- bb_it++ )
408+ for (auto && iter : savedFCallState)
465409 {
466- G4_BB* curBB = (*bb_it);
467-
468- if ( curBB->size () > 0 &&
469- curBB->back ()->isCall () )
410+ auto curBB = iter.first ;
411+ curBB->pop_back ();
412+ auto origInst = iter.second ;
413+ assert (origInst->isFCall () || origInst->isFReturn ());
414+ curBB->push_back (origInst);
415+ if (origInst->isFCall () && !origInst->asCFInst ()->isIndirectCall ())
470416 {
471- // Check whether this call is a convert from fcall
472- for ( auto state_it = start;
473- state_it != end;
474- state_it++ )
417+ // curBB must have a physical successor as we don't allow calls that do not return
418+ G4_BB* retBlock = curBB->getPhysicalSucc ();
419+ G4_BB* retbbToConvert = retBlock->Preds .back ();
420+ kernel->fg .removePredSuccEdges (retbbToConvert, retBlock);
421+ // Remove edge between call and previously joined function
422+ while (curBB->Succs .size () > 0 )
475423 {
476- if ( (*state_it).second .fcallInst == curBB->back () )
477- {
478- // Found a call to replace with fcall and ret with fret
479-
480- // Restore corresponding ret to fret
481- G4_BB* retBlock = (*state_it).second .retBlock ;
482-
483- G4_BB* retbbToConvert = retBlock->Preds .back ();
484-
485- G4_INST* retToReplace = retbbToConvert->back ();
486-
487- retToReplace->asCFInst ()->retToFRet ();
488- retToReplace->setDest (NULL );
489-
490- kernel->fg .removePredSuccEdges (retbbToConvert, retBlock);
491-
492- // Now restore call operands
493- G4_INST* instToReplace = curBB->back ();
494-
495- auto & state = (*state_it).second ;
496- instToReplace->setSrc (state.opnd0 , 0 );
497- instToReplace->setSrc (state.opnd1 , 1 );
498- instToReplace->setExecSize ((unsigned char )state.execSize );
499-
500- // Remove edge between call and previously joined function
501- while ( curBB->Succs .size () > 0 )
502- {
503- kernel->fg .removePredSuccEdges ( curBB, curBB->Succs .front () );
504- }
505-
506- // Restore edge to retBlock
507- kernel->fg .addPredSuccEdges ( curBB, (*state_it).second .retBlock );
508-
509- instToReplace->asCFInst ()->callToFCall ();
510- }
424+ kernel->fg .removePredSuccEdges (curBB, curBB->Succs .front ());
511425 }
512- }
513- }
514-
515- for (G4_BB* retBB : savedFCallState.retbbs )
516- {
517- G4_INST* retToReplace = retBB->back ();
518-
519- retToReplace->asCFInst ()->retToFRet ();
520- retToReplace->setDest (NULL );
521426
427+ // Restore edge to retBlock
428+ kernel->fg .addPredSuccEdges (curBB, retBlock);
429+ }
522430 }
523431
524432 // Remove all in-edges to stack call function. These may have been added
525433 // to connect earlier kernels with the function.
526- while ( kernel->fg .getEntryBB ()->Preds .size () > 0 )
434+ while ( kernel->fg .getEntryBB ()->Preds .size () > 0 )
527435 {
528- kernel->fg .removePredSuccEdges ( kernel->fg .getEntryBB ()->Preds .front (), kernel->fg .getEntryBB () );
436+ kernel->fg .removePredSuccEdges (kernel->fg .getEntryBB ()->Preds .front (), kernel->fg .getEntryBB ());
529437 }
530438}
531439
532440// Stitch the FG of subFunctions to mainFunc
533441// mainFunc could be a kernel or a non-kernel function.
534442// It also modifies pseudo_fcall/fret in to call/ret opcodes.
535443// ToDo: may consider stitching only functions that may be called by this kernel/function
536- static void Stitch_Compiled_Units (G4_Kernel* mainFunc, std::map<std::string, G4_Kernel*>& subFuncs)
444+ static void Stitch_Compiled_Units (G4_Kernel* mainFunc, std::map<std::string, G4_Kernel*>& subFuncs, std::map<G4_BB*, G4_INST*>& FCallRetMap )
537445{
538446
539447 // Append subFunctions to mainFunc
@@ -549,11 +457,13 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
549457 }
550458
551459 mainFunc->fg .reassignBlockIDs ();
460+ mainFunc->fg .setPhysicalPredSucc (); // this is to locate the next BB after an fcall
552461
462+ auto builder = mainFunc->fg .builder ;
553463 // Change fcall/fret to call/ret and setup caller/callee edges
554464 for (G4_BB* cur : mainFunc->fg )
555465 {
556- if (cur->size () > 0 && cur-> isEndWithFCall ())
466+ if (cur->isEndWithFCall ())
557467 {
558468 // Setup successor/predecessor
559469 G4_INST* fcall = cur->back ();
@@ -565,6 +475,7 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
565475 if (!fcall->asCFInst ()->isIndirectCall ())
566476 {
567477 // Setup caller/callee edges for direct call
478+ // ToDo: remove this once SWSB is moved before stithcing, as we would not need to maintain CFG otherwise
568479 std::string funcName = fcall->getSrc (0 )->asLabel ()->getLabel ();
569480
570481 auto iter = subFuncs.find (funcName);
@@ -585,29 +496,34 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
585496 G4_INST* calleeLabel = callee->fg .getEntryBB ()->front ();
586497 ASSERT_USER (calleeLabel->isLabel () == true , " Entry inst is not label" );
587498
588- // ret/e-mask
589- fcall->setSrc (fcall->getSrc (0 ), 1 );
590-
591- // dst label
592- fcall->setSrc (calleeLabel->getSrc (0 ), 0 );
593- fcall->asCFInst ()->pseudoCallToCall ();
499+ auto callInst = builder->createInternalInst (fcall->getPredicate (), G4_call, nullptr , false , fcall->getExecSize (), fcall->getDst (),
500+ calleeLabel->getSrc (0 ), fcall->getSrc (0 ), fcall->getOption ());
501+ cur->pop_back ();
502+ cur->push_back (callInst);
594503 }
595504 else
596505 {
597- fcall->setSrc (fcall->getSrc (0 ), 1 );
598- fcall->asCFInst ()->pseudoCallToCall ();
506+ // src0 is dont care for indirect call as long it's not a label
507+ auto callInst = builder->createInternalInst (fcall->getPredicate (), G4_call, nullptr , false , fcall->getExecSize (), fcall->getDst (),
508+ fcall->getSrc (0 ), fcall->getSrc (0 ), fcall->getOption ());
509+ cur->pop_back ();
510+ cur->push_back (callInst);
599511 }
512+ FCallRetMap[cur] = fcall;
600513 }
601514 }
602515
603516 // Change fret to ret
604517 for (G4_BB* cur : mainFunc->fg )
605518 {
606- if ( cur-> size () > 0 && cur->isEndWithFRet () )
519+ if ( cur->isEndWithFRet ())
607520 {
608521 G4_INST* fret = cur->back ();
609- fret->asCFInst ()->pseudoRetToRet ();
610- fret->setDest ( mainFunc->fg .builder ->createNullDst (Type_UD) );
522+ auto retInst = builder->createInternalInst (fret->getPredicate (), G4_return, nullptr , false , fret->getExecSize (), builder->createNullDst (Type_UD),
523+ fret->getSrc (0 ), fret->getSrc (1 ), fret->getOption ());
524+ cur->pop_back ();
525+ cur->push_back (retInst);
526+ FCallRetMap[cur] = fret;
611527 }
612528 }
613529
@@ -850,12 +766,6 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
850766 }
851767 }
852768
853- SavedFCallStates savedFCallState;
854- for (auto func : m_kernelsAndFunctions)
855- {
856- saveFCallState (func->getKernel (), savedFCallState);
857- }
858-
859769 // Preparing for stitching some functions to other functions
860770 // There are two stiching policies:
861771 // 1. vISA_noStitchExternFunc == false
@@ -878,7 +788,7 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
878788 continue ;
879789 } else {
880790 if (!m_options.getOption (vISA_noStitchExternFunc)) {
881- // Policy 1: all fnunctions will stitch to kernels
791+ // Policy 1: all functions will stitch to kernels
882792 subFunctions.push_back (func);
883793 subFunctionsNameMap[std::string (func->getName ())] = func->getKernel ();
884794 } else {
@@ -911,7 +821,9 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
911821 {
912822 unsigned int genxBufferSize = 0 ;
913823
914- Stitch_Compiled_Units (func->getKernel (), subFunctionsNameMap);
824+ // store the BBs with FCall and FRet, which must terminate the BB
825+ std::map<G4_BB*, G4_INST*> origFCallFRet;
826+ Stitch_Compiled_Units (func->getKernel (), subFunctionsNameMap, origFCallFRet);
915827
916828 void * genxBuffer = func->compilePostOptimize (genxBufferSize);
917829 func->setGenxBinaryBuffer (genxBuffer, genxBufferSize);
@@ -921,7 +833,7 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
921833 func->computeAndEmitDebugInfo (subFunctions);
922834 }
923835
924- restoreFCallState (func->getKernel (), savedFCallState );
836+ restoreFCallState (func->getKernel (), origFCallFRet );
925837
926838
927839 }
0 commit comments