@@ -2450,6 +2450,179 @@ void CustomSafeOptPass::visitExtractElementInst(ExtractElementInst& I)
24502450 dp4WithIdentityMatrix (I);
24512451}
24522452
2453+ // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2454+ // This pass removes dead local memory loads and stores. If we remove all such loads and stores, we also
2455+ // remove all local memory fences together with barriers that follow.
2456+ //
2457+ IGC_INITIALIZE_PASS_BEGIN (TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2458+ IGC_INITIALIZE_PASS_END(TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2459+
2460+ char TrivialLocalMemoryOpsElimination::ID = 0;
2461+
2462+ TrivialLocalMemoryOpsElimination::TrivialLocalMemoryOpsElimination () : FunctionPass(ID)
2463+ {
2464+ initializeTrivialLocalMemoryOpsEliminationPass (*PassRegistry::getPassRegistry ());
2465+ }
2466+
2467+ bool TrivialLocalMemoryOpsElimination::runOnFunction (Function& F)
2468+ {
2469+ bool change = false ;
2470+
2471+ IGCMD::MetaDataUtils* pMdUtil = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils ();
2472+ if (!isEntryFunc (pMdUtil, &F))
2473+ {
2474+ // Skip if it is non-entry function. For example, a subroutine
2475+ // foo ( local int* p) { ...... store v, p; ......}
2476+ // in which no localMemoptimization will be performed.
2477+ return change;
2478+ }
2479+
2480+ visit (F);
2481+ if (!abortPass && (m_LocalLoadsToRemove.empty () ^ m_LocalStoresToRemove.empty ()))
2482+ {
2483+ for (StoreInst* Inst : m_LocalStoresToRemove)
2484+ {
2485+ Inst->eraseFromParent ();
2486+ change = true ;
2487+ }
2488+
2489+ for (LoadInst* Inst : m_LocalLoadsToRemove)
2490+ {
2491+ if (Inst->use_empty ())
2492+ {
2493+ Inst->eraseFromParent ();
2494+ change = true ;
2495+ }
2496+ }
2497+
2498+ for (CallInst* Inst : m_LocalFencesBariersToRemove)
2499+ {
2500+ Inst->eraseFromParent ();
2501+ change = true ;
2502+ }
2503+ }
2504+ m_LocalStoresToRemove.clear ();
2505+ m_LocalLoadsToRemove.clear ();
2506+ m_LocalFencesBariersToRemove.clear ();
2507+
2508+ return change;
2509+ }
2510+
2511+ /*
2512+ OCL instruction barrier(CLK_LOCAL_MEM_FENCE); is translate to two instructions
2513+ call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2514+ call void @llvm.genx.GenISA.threadgroupbarrier()
2515+
2516+ if we remove call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2517+ we must remove next instruction if it is call void @llvm.genx.GenISA.threadgroupbarrier()
2518+ */
2519+ void TrivialLocalMemoryOpsElimination::findNextThreadGroupBarrierInst (Instruction& I)
2520+ {
2521+ auto nextInst = I.getNextNonDebugInstruction ();
2522+ if (isa<GenIntrinsicInst>(nextInst))
2523+ {
2524+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(nextInst);
2525+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_threadgroupbarrier)
2526+ {
2527+ m_LocalFencesBariersToRemove.push_back (dyn_cast<CallInst>(nextInst));
2528+ }
2529+ }
2530+ }
2531+
2532+ void TrivialLocalMemoryOpsElimination::visitLoadInst (LoadInst& I)
2533+ {
2534+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2535+ {
2536+ m_LocalLoadsToRemove.push_back (&I);
2537+ }
2538+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2539+ {
2540+ abortPass = true ;
2541+ }
2542+ }
2543+
2544+ void TrivialLocalMemoryOpsElimination::visitStoreInst (StoreInst& I)
2545+ {
2546+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2547+ {
2548+ if (auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand ()->stripPointerCasts ()))
2549+ {
2550+ // Device sanitizer instrumentation pass inserts a new local memory
2551+ // variable and inserts store to the variable in a kernel. The
2552+ // variable is loaded later in no-inline functions. For this case,
2553+ // do not eliminate the store.
2554+ if (GV->getName ().startswith (" __Asan" ))
2555+ {
2556+ return ;
2557+ }
2558+ }
2559+ m_LocalStoresToRemove.push_back (&I);
2560+ }
2561+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2562+ {
2563+ abortPass = true ;
2564+ }
2565+ }
2566+
2567+ bool TrivialLocalMemoryOpsElimination::isLocalBarrier (CallInst& I)
2568+ {
2569+ // check arguments in call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) if match to
2570+ // (i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) it is local barrier
2571+ std::vector<bool > argumentsOfMemoryBarrier;
2572+
2573+ for (auto arg = I.arg_begin (); arg != I.arg_end (); ++arg)
2574+ {
2575+ ConstantInt* ci = dyn_cast<ConstantInt>(arg);
2576+ if (ci) {
2577+ argumentsOfMemoryBarrier.push_back (ci->getValue ().getBoolValue ());
2578+ }
2579+ else {
2580+ // argument is not a constant, so we can't tell.
2581+ return false ;
2582+ }
2583+ }
2584+
2585+ return argumentsOfMemoryBarrier == m_argumentsOfLocalMemoryBarrier;
2586+ }
2587+
2588+ // If any call instruction use pointer to local memory abort pass execution
2589+ void TrivialLocalMemoryOpsElimination::anyCallInstUseLocalMemory (CallInst& I)
2590+ {
2591+ Function* fn = I.getCalledFunction ();
2592+
2593+ if (fn != NULL )
2594+ {
2595+ for (auto arg = fn->arg_begin (); arg != fn->arg_end (); ++arg)
2596+ {
2597+ if (arg->getType ()->isPointerTy ())
2598+ {
2599+ if (arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_LOCAL || arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_GENERIC) abortPass = true ;
2600+ }
2601+ }
2602+ }
2603+ }
2604+
2605+ void TrivialLocalMemoryOpsElimination::visitCallInst (CallInst& I)
2606+ {
2607+ // detect only: llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2608+ // (note: the first and last arguments are true)
2609+ // and add them with immediately following barriers to m_LocalFencesBariersToRemove
2610+ anyCallInstUseLocalMemory (I);
2611+
2612+ if (isa<GenIntrinsicInst>(I))
2613+ {
2614+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(&I);
2615+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_memoryfence)
2616+ {
2617+ if (isLocalBarrier (I))
2618+ {
2619+ m_LocalFencesBariersToRemove.push_back (&I);
2620+ findNextThreadGroupBarrierInst (I);
2621+ }
2622+ }
2623+ }
2624+ }
2625+
24532626// //////////////////////////////////////////////////////////////////////////////
24542627IGC_INITIALIZE_PASS_BEGIN (TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
24552628IGC_INITIALIZE_PASS_END(TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
0 commit comments