| 
 | 1 | +//=== SYCLOptimizeBackToBackBarrier.cpp - SYCL barrier optimization pass ===//  | 
 | 2 | +//  | 
 | 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  | 
 | 4 | +// See https://llvm.org/LICENSE.txt for license information.  | 
 | 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  | 
 | 6 | +//  | 
 | 7 | +//===----------------------------------------------------------------------===//  | 
 | 8 | +//  | 
 | 9 | +// This pass cleans up back-to-back ControlBarrier calls.  | 
 | 10 | +//  | 
 | 11 | +//===----------------------------------------------------------------------===//  | 
 | 12 | + | 
 | 13 | +#include "llvm/SYCLLowerIR/SYCLOptimizeBackToBackBarrier.h"  | 
 | 14 | + | 
 | 15 | +#include "llvm/IR/IRBuilder.h"  | 
 | 16 | + | 
 | 17 | +using namespace llvm;  | 
 | 18 | + | 
 | 19 | +namespace {  | 
 | 20 | + | 
 | 21 | +static constexpr char CONTROL_BARRIER[] = "_Z22__spirv_ControlBarrieriii";  | 
 | 22 | +static constexpr char ITT_BARRIER[] = "__itt_offload_wg_barrier_wrapper";  | 
 | 23 | +static constexpr char ITT_RESUME[] = "__itt_offload_wi_resume_wrapper";  | 
 | 24 | + | 
 | 25 | +// Known scopes in SPIR-V.  | 
 | 26 | +enum class Scope {  | 
 | 27 | +  CrossDevice = 0,  | 
 | 28 | +  Device = 1,  | 
 | 29 | +  Workgroup = 2,  | 
 | 30 | +  Subgroup = 3,  | 
 | 31 | +  Invocation = 4  | 
 | 32 | +};  | 
 | 33 | + | 
 | 34 | +enum class CompareRes { BIGGER = 0, SMALLER = 1, EQUAL = 2, UNKNOWN = 3 };  | 
 | 35 | + | 
 | 36 | +// This map is added in case of any future scopes are added to SPIR-V and/or  | 
 | 37 | +// SYCL.  | 
 | 38 | +const std::unordered_map<uint64_t, uint64_t> ScopeWeights = {  | 
 | 39 | +    {static_cast<uint64_t>(Scope::CrossDevice), 1000},  | 
 | 40 | +    {static_cast<uint64_t>(Scope::Device), 800},  | 
 | 41 | +    {static_cast<uint64_t>(Scope::Workgroup), 600},  | 
 | 42 | +    {static_cast<uint64_t>(Scope::Subgroup), 400},  | 
 | 43 | +    {static_cast<uint64_t>(Scope::Invocation), 10}};  | 
 | 44 | + | 
 | 45 | +inline CompareRes compareScopesWithWeights(const uint64_t LHS,  | 
 | 46 | +                                           const uint64_t RHS) {  | 
 | 47 | +  auto LHSIt = ScopeWeights.find(LHS);  | 
 | 48 | +  auto RHSIt = ScopeWeights.find(RHS);  | 
 | 49 | + | 
 | 50 | +  if (LHSIt == ScopeWeights.end() || RHSIt == ScopeWeights.end())  | 
 | 51 | +    return CompareRes::UNKNOWN;  | 
 | 52 | + | 
 | 53 | +  const uint64_t LHSWeight = LHSIt->second;  | 
 | 54 | +  const uint64_t RHSWeight = RHSIt->second;  | 
 | 55 | + | 
 | 56 | +  if (LHSWeight > RHSWeight)  | 
 | 57 | +    return CompareRes::BIGGER;  | 
 | 58 | +  if (LHSWeight < RHSWeight)  | 
 | 59 | +    return CompareRes::SMALLER;  | 
 | 60 | +  return CompareRes::EQUAL;  | 
 | 61 | +}  | 
 | 62 | + | 
 | 63 | +// The function removes back-to-back ControlBarrier calls in case if they  | 
 | 64 | +// have the same memory scope and memory semantics arguments. When two  | 
 | 65 | +// back-to-back ControlBarriers are having different execution scope arguments -  | 
 | 66 | +// pick the one with the 'bigger' scope.  | 
 | 67 | +// It also cleans up ITT annotations surrounding the removed barrier call.  | 
 | 68 | +bool processControlBarrier(Function *F) {  | 
 | 69 | +  BasicBlock *PrevBB = nullptr;  | 
 | 70 | +  llvm::SmallPtrSet<Instruction *, 8> ToErase;  | 
 | 71 | +  for (auto I = F->user_begin(), E = F->user_end(); I != E;) {  | 
 | 72 | +    User *U = *I++;  | 
 | 73 | +    auto *CI = dyn_cast<CallInst>(U);  | 
 | 74 | +    if (!CI)  | 
 | 75 | +      continue;  | 
 | 76 | + | 
 | 77 | +    // New basic block - new processing.  | 
 | 78 | +    BasicBlock *CurrentBB = CI->getParent();  | 
 | 79 | +    if (CurrentBB != PrevBB) {  | 
 | 80 | +      PrevBB = CurrentBB;  | 
 | 81 | +      continue;  | 
 | 82 | +    }  | 
 | 83 | + | 
 | 84 | +    llvm::SmallPtrSet<Instruction *, 2> ToEraseLocalITT;  | 
 | 85 | +    BasicBlock::iterator It(CI);  | 
 | 86 | +    // Iterate over the basic block storing back-to-back barriers and their ITT  | 
 | 87 | +    // annotations into ToErase container.  | 
 | 88 | +    while (It != CurrentBB->begin()) {  | 
 | 89 | +      --It;  | 
 | 90 | +      auto *Cand = dyn_cast<CallInst>(&*It);  | 
 | 91 | +      if (!Cand)  | 
 | 92 | +        break;  | 
 | 93 | +      CallInst *CIToRemove = Cand;  | 
 | 94 | +      StringRef CandName = Cand->getCalledFunction()->getName();  | 
 | 95 | +      if (CandName == ITT_RESUME || CandName == ITT_BARRIER) {  | 
 | 96 | +        ToEraseLocalITT.insert(Cand);  | 
 | 97 | +        continue;  | 
 | 98 | +      } else if (CandName == CONTROL_BARRIER) {  | 
 | 99 | +        bool EqualOps = true;  | 
 | 100 | +        const auto *ExecutionScopeCI = CI->getOperand(0);  | 
 | 101 | +        const auto *ExecutionScopeCand = Cand->getOperand(0);  | 
 | 102 | +        if (ExecutionScopeCI != ExecutionScopeCand) {  | 
 | 103 | +          if (isa<ConstantInt>(ExecutionScopeCI) &&  | 
 | 104 | +              isa<ConstantInt>(ExecutionScopeCand)) {  | 
 | 105 | +            const auto ConstScopeCI =  | 
 | 106 | +                cast<ConstantInt>(ExecutionScopeCI)->getZExtValue();  | 
 | 107 | +            const auto ConstScopeCand =  | 
 | 108 | +                cast<ConstantInt>(ExecutionScopeCand)->getZExtValue();  | 
 | 109 | +            // Pick ControlBarrier with the 'bigger' execution scope.  | 
 | 110 | +            const auto Compare =  | 
 | 111 | +                compareScopesWithWeights(ConstScopeCI, ConstScopeCand);  | 
 | 112 | +            if (Compare == CompareRes::SMALLER)  | 
 | 113 | +              CIToRemove = CI;  | 
 | 114 | +            else if (Compare == CompareRes::UNKNOWN)  | 
 | 115 | +              // Unknown scopes = unknown rules. Keep ControlBarrier call.  | 
 | 116 | +              EqualOps = false;  | 
 | 117 | +          } else  | 
 | 118 | +            EqualOps = false;  | 
 | 119 | +        }  | 
 | 120 | +        // TODO: may be handle a case with not-matching memory scope and  | 
 | 121 | +        // memory semantic arguments in a smart way.  | 
 | 122 | +        for (unsigned I = 1; I != CI->getNumOperands(); ++I) {  | 
 | 123 | +          if (CI->getOperand(I) != Cand->getOperand(I)) {  | 
 | 124 | +            EqualOps = false;  | 
 | 125 | +            break;  | 
 | 126 | +          }  | 
 | 127 | +        }  | 
 | 128 | +        if (EqualOps) {  | 
 | 129 | +          ToErase.insert(CIToRemove);  | 
 | 130 | +          for (auto *ITT : ToEraseLocalITT)  | 
 | 131 | +            ToErase.insert(ITT);  | 
 | 132 | +          ToEraseLocalITT.clear();  | 
 | 133 | +        }  | 
 | 134 | +      }  | 
 | 135 | +    }  | 
 | 136 | +  }  | 
 | 137 | + | 
 | 138 | +  if (ToErase.empty())  | 
 | 139 | +    return false;  | 
 | 140 | + | 
 | 141 | +  for (auto *I : ToErase) {  | 
 | 142 | +    I->dropAllReferences();  | 
 | 143 | +    I->eraseFromParent();  | 
 | 144 | +  }  | 
 | 145 | + | 
 | 146 | +  return true;  | 
 | 147 | +}  | 
 | 148 | + | 
 | 149 | +} // namespace  | 
 | 150 | + | 
 | 151 | +PreservedAnalyses  | 
 | 152 | +SYCLOptimizeBackToBackBarrierPass::run(Module &M, ModuleAnalysisManager &MAM) {  | 
 | 153 | +  bool ModuleChanged = false;  | 
 | 154 | +  for (Function &F : M)  | 
 | 155 | +    if (F.isDeclaration())  | 
 | 156 | +      if (F.getName() == CONTROL_BARRIER)  | 
 | 157 | +        ModuleChanged |= processControlBarrier(&F);  | 
 | 158 | + | 
 | 159 | +  return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();  | 
 | 160 | +}  | 
0 commit comments