@@ -2451,6 +2451,179 @@ void CustomSafeOptPass::visitExtractElementInst(ExtractElementInst& I)
24512451    dp4WithIdentityMatrix (I);
24522452}
24532453
2454+ // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2455+ //  This pass removes dead local memory loads and stores. If we remove all such loads and stores, we also
2456+ //  remove all local memory fences together with barriers that follow.
2457+ // 
2458+ IGC_INITIALIZE_PASS_BEGIN (TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" " TrivialLocalMemoryOpsElimination" false , false )
2459+ IGC_INITIALIZE_PASS_END(TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" " TrivialLocalMemoryOpsElimination" false , false )
2460+ 
2461+ char TrivialLocalMemoryOpsElimination::ID = 0;
2462+ 
2463+ TrivialLocalMemoryOpsElimination::TrivialLocalMemoryOpsElimination () : FunctionPass(ID)
2464+ {
2465+     initializeTrivialLocalMemoryOpsEliminationPass (*PassRegistry::getPassRegistry ());
2466+ }
2467+ 
2468+ bool  TrivialLocalMemoryOpsElimination::runOnFunction (Function& F)
2469+ {
2470+     bool  change = false ;
2471+ 
2472+     IGCMD::MetaDataUtils* pMdUtil = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils ();
2473+     if  (!isEntryFunc (pMdUtil, &F))
2474+     {
2475+         //  Skip if it is non-entry function.  For example, a subroutine
2476+         //    foo ( local int* p) { ...... store v, p; ......}
2477+         //  in which no localMemoptimization will be performed.
2478+         return  change;
2479+     }
2480+ 
2481+     visit (F);
2482+     if  (!abortPass && (m_LocalLoadsToRemove.empty () ^ m_LocalStoresToRemove.empty ()))
2483+     {
2484+         for  (StoreInst* Inst : m_LocalStoresToRemove)
2485+         {
2486+             Inst->eraseFromParent ();
2487+             change = true ;
2488+         }
2489+ 
2490+         for  (LoadInst* Inst : m_LocalLoadsToRemove)
2491+         {
2492+             if  (Inst->use_empty ())
2493+             {
2494+                 Inst->eraseFromParent ();
2495+                 change = true ;
2496+             }
2497+         }
2498+ 
2499+         for  (CallInst* Inst : m_LocalFencesBariersToRemove)
2500+         {
2501+             Inst->eraseFromParent ();
2502+             change = true ;
2503+         }
2504+     }
2505+     m_LocalStoresToRemove.clear ();
2506+     m_LocalLoadsToRemove.clear ();
2507+     m_LocalFencesBariersToRemove.clear ();
2508+ 
2509+     return  change;
2510+ }
2511+ 
2512+ /* 
2513+ OCL instruction barrier(CLK_LOCAL_MEM_FENCE); is translate to two instructions 
2514+ call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) 
2515+ call void @llvm.genx.GenISA.threadgroupbarrier() 
2516+ 
2517+ if we remove call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) 
2518+ we must remove next instruction if it is call void @llvm.genx.GenISA.threadgroupbarrier() 
2519+ */ 
2520+ void  TrivialLocalMemoryOpsElimination::findNextThreadGroupBarrierInst (Instruction& I)
2521+ {
2522+     auto  nextInst = I.getNextNonDebugInstruction ();
2523+     if  (isa<GenIntrinsicInst>(nextInst))
2524+     {
2525+         GenIntrinsicInst* II = cast<GenIntrinsicInst>(nextInst);
2526+         if  (II->getIntrinsicID () == GenISAIntrinsic::GenISA_threadgroupbarrier)
2527+         {
2528+             m_LocalFencesBariersToRemove.push_back (dyn_cast<CallInst>(nextInst));
2529+         }
2530+     }
2531+ }
2532+ 
2533+ void  TrivialLocalMemoryOpsElimination::visitLoadInst (LoadInst& I)
2534+ {
2535+     if  (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2536+     {
2537+         m_LocalLoadsToRemove.push_back (&I);
2538+     }
2539+     else  if  (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2540+     {
2541+         abortPass = true ;
2542+     }
2543+ }
2544+ 
2545+ void  TrivialLocalMemoryOpsElimination::visitStoreInst (StoreInst& I)
2546+ {
2547+     if  (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2548+     {
2549+         if  (auto  *GV = dyn_cast<GlobalVariable>(I.getPointerOperand ()->stripPointerCasts ()))
2550+         {
2551+             //  Device sanitizer instrumentation pass inserts a new local memory
2552+             //  variable and inserts store to the variable in a kernel. The
2553+             //  variable is loaded later in no-inline functions. For this case,
2554+             //  do not eliminate the store.
2555+             if  (GV->getName ().startswith (" __Asan" 
2556+             {
2557+                 return ;
2558+             }
2559+         }
2560+         m_LocalStoresToRemove.push_back (&I);
2561+     }
2562+     else  if  (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2563+     {
2564+         abortPass = true ;
2565+     }
2566+ }
2567+ 
2568+ bool  TrivialLocalMemoryOpsElimination::isLocalBarrier (CallInst& I)
2569+ {
2570+     // check arguments in call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) if match to
2571+     //  (i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) it is local barrier
2572+     std::vector<bool > argumentsOfMemoryBarrier;
2573+ 
2574+     for  (auto  arg = I.arg_begin (); arg != I.arg_end (); ++arg)
2575+     {
2576+         ConstantInt* ci = dyn_cast<ConstantInt>(arg);
2577+         if  (ci) {
2578+             argumentsOfMemoryBarrier.push_back (ci->getValue ().getBoolValue ());
2579+         }
2580+         else  {
2581+             //  argument is not a constant, so we can't tell.
2582+             return  false ;
2583+         }
2584+     }
2585+ 
2586+     return  argumentsOfMemoryBarrier == m_argumentsOfLocalMemoryBarrier;
2587+ }
2588+ 
2589+ //  If any call instruction use pointer to local memory abort pass execution
2590+ void  TrivialLocalMemoryOpsElimination::anyCallInstUseLocalMemory (CallInst& I)
2591+ {
2592+     Function* fn = I.getCalledFunction ();
2593+ 
2594+     if  (fn != NULL )
2595+     {
2596+         for  (auto  arg = fn->arg_begin (); arg != fn->arg_end (); ++arg)
2597+         {
2598+             if  (arg->getType ()->isPointerTy ())
2599+             {
2600+                 if  (arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_LOCAL || arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_GENERIC) abortPass = true ;
2601+             }
2602+         }
2603+     }
2604+ }
2605+ 
2606+ void  TrivialLocalMemoryOpsElimination::visitCallInst (CallInst& I)
2607+ {
2608+     //  detect only: llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2609+     //  (note: the first and last arguments are true)
2610+     //  and add them with immediately following barriers to m_LocalFencesBariersToRemove
2611+     anyCallInstUseLocalMemory (I);
2612+ 
2613+     if  (isa<GenIntrinsicInst>(I))
2614+     {
2615+         GenIntrinsicInst* II = cast<GenIntrinsicInst>(&I);
2616+         if  (II->getIntrinsicID () == GenISAIntrinsic::GenISA_memoryfence)
2617+         {
2618+             if  (isLocalBarrier (I))
2619+             {
2620+                 m_LocalFencesBariersToRemove.push_back (&I);
2621+                 findNextThreadGroupBarrierInst (I);
2622+             }
2623+         }
2624+     }
2625+  }
2626+ 
24542627// //////////////////////////////////////////////////////////////////////////////
24552628IGC_INITIALIZE_PASS_BEGIN (TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" " TrivialUnnecessaryTGMFenceElimination" false , false )
24562629IGC_INITIALIZE_PASS_END(TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" " TrivialUnnecessaryTGMFenceElimination" false , false )
0 commit comments