|
| 1 | +#include "llvm/Target/TargetVerify/AMDGPUTargetVerifier.h" |
| 2 | + |
| 3 | +#include "llvm/Analysis/UniformityAnalysis.h" |
| 4 | +#include "llvm/Analysis/PostDominators.h" |
| 5 | +#include "llvm/Support/Debug.h" |
| 6 | +#include "llvm/IR/Dominators.h" |
| 7 | +#include "llvm/IR/Function.h" |
| 8 | +#include "llvm/IR/IntrinsicInst.h" |
| 9 | +#include "llvm/IR/IntrinsicsAMDGPU.h" |
| 10 | +#include "llvm/IR/Module.h" |
| 11 | +#include "llvm/IR/Value.h" |
| 12 | + |
| 13 | +#include "llvm/Support/raw_ostream.h" |
| 14 | + |
| 15 | +using namespace llvm; |
| 16 | + |
| 17 | +static cl::opt<bool> |
| 18 | +MarkUniform("mark-uniform", cl::desc("Mark instructions as uniform"), cl::init(false)); |
| 19 | + |
| 20 | +// Check - We know that cond should be true, if not print an error message. |
| 21 | +#define Check(C, ...) \ |
| 22 | + do { \ |
| 23 | + if (!(C)) { \ |
| 24 | + TargetVerify::CheckFailed(__VA_ARGS__); \ |
| 25 | + return; \ |
| 26 | + } \ |
| 27 | + } while (false) |
| 28 | + |
| 29 | +static bool isMFMA(unsigned IID) { |
| 30 | + switch (IID) { |
| 31 | + case Intrinsic::amdgcn_mfma_f32_4x4x1f32: |
| 32 | + case Intrinsic::amdgcn_mfma_f32_4x4x4f16: |
| 33 | + case Intrinsic::amdgcn_mfma_i32_4x4x4i8: |
| 34 | + case Intrinsic::amdgcn_mfma_f32_4x4x2bf16: |
| 35 | + |
| 36 | + case Intrinsic::amdgcn_mfma_f32_16x16x1f32: |
| 37 | + case Intrinsic::amdgcn_mfma_f32_16x16x4f32: |
| 38 | + case Intrinsic::amdgcn_mfma_f32_16x16x4f16: |
| 39 | + case Intrinsic::amdgcn_mfma_f32_16x16x16f16: |
| 40 | + case Intrinsic::amdgcn_mfma_i32_16x16x4i8: |
| 41 | + case Intrinsic::amdgcn_mfma_i32_16x16x16i8: |
| 42 | + case Intrinsic::amdgcn_mfma_f32_16x16x2bf16: |
| 43 | + case Intrinsic::amdgcn_mfma_f32_16x16x8bf16: |
| 44 | + |
| 45 | + case Intrinsic::amdgcn_mfma_f32_32x32x1f32: |
| 46 | + case Intrinsic::amdgcn_mfma_f32_32x32x2f32: |
| 47 | + case Intrinsic::amdgcn_mfma_f32_32x32x4f16: |
| 48 | + case Intrinsic::amdgcn_mfma_f32_32x32x8f16: |
| 49 | + case Intrinsic::amdgcn_mfma_i32_32x32x4i8: |
| 50 | + case Intrinsic::amdgcn_mfma_i32_32x32x8i8: |
| 51 | + case Intrinsic::amdgcn_mfma_f32_32x32x2bf16: |
| 52 | + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: |
| 53 | + |
| 54 | + case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k: |
| 55 | + case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k: |
| 56 | + case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k: |
| 57 | + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k: |
| 58 | + case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k: |
| 59 | + |
| 60 | + case Intrinsic::amdgcn_mfma_f64_16x16x4f64: |
| 61 | + case Intrinsic::amdgcn_mfma_f64_4x4x4f64: |
| 62 | + |
| 63 | + case Intrinsic::amdgcn_mfma_i32_16x16x32_i8: |
| 64 | + case Intrinsic::amdgcn_mfma_i32_32x32x16_i8: |
| 65 | + case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32: |
| 66 | + case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: |
| 67 | + |
| 68 | + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8: |
| 69 | + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8: |
| 70 | + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8: |
| 71 | + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8: |
| 72 | + |
| 73 | + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8: |
| 74 | + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8: |
| 75 | + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8: |
| 76 | + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: |
| 77 | + return true; |
| 78 | + default: |
| 79 | + return false; |
| 80 | + } |
| 81 | +} |
| 82 | + |
| 83 | +namespace llvm { |
| 84 | +class AMDGPUTargetVerify : public TargetVerify { |
| 85 | +public: |
| 86 | + Module *Mod; |
| 87 | + |
| 88 | + DominatorTree *DT; |
| 89 | + PostDominatorTree *PDT; |
| 90 | + UniformityInfo *UA; |
| 91 | + |
| 92 | + AMDGPUTargetVerify(Module *Mod, DominatorTree *DT, PostDominatorTree *PDT, UniformityInfo *UA) |
| 93 | + : TargetVerify(Mod), Mod(Mod), DT(DT), PDT(PDT), UA(UA) {} |
| 94 | + |
| 95 | + void run(Function &F); |
| 96 | +}; |
| 97 | + |
| 98 | +static bool IsValidInt(const Type *Ty) { |
| 99 | + return Ty->isIntegerTy(1) || |
| 100 | + Ty->isIntegerTy(8) || |
| 101 | + Ty->isIntegerTy(16) || |
| 102 | + Ty->isIntegerTy(32) || |
| 103 | + Ty->isIntegerTy(64) || |
| 104 | + Ty->isIntegerTy(128); |
| 105 | +} |
| 106 | + |
| 107 | +static bool isShader(CallingConv::ID CC) { |
| 108 | + switch(CC) { |
| 109 | + case CallingConv::AMDGPU_VS: |
| 110 | + case CallingConv::AMDGPU_LS: |
| 111 | + case CallingConv::AMDGPU_HS: |
| 112 | + case CallingConv::AMDGPU_ES: |
| 113 | + case CallingConv::AMDGPU_GS: |
| 114 | + case CallingConv::AMDGPU_PS: |
| 115 | + case CallingConv::AMDGPU_CS_Chain: |
| 116 | + case CallingConv::AMDGPU_CS_ChainPreserve: |
| 117 | + case CallingConv::AMDGPU_CS: |
| 118 | + return true; |
| 119 | + default: |
| 120 | + return false; |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +void AMDGPUTargetVerify::run(Function &F) { |
| 125 | + // Ensure shader calling convention returns void |
| 126 | + if (isShader(F.getCallingConv())) |
| 127 | + Check(F.getReturnType() == Type::getVoidTy(F.getContext()), "Shaders must return void"); |
| 128 | + |
| 129 | + for (auto &BB : F) { |
| 130 | + |
| 131 | + for (auto &I : BB) { |
| 132 | + if (MarkUniform) |
| 133 | + outs() << UA->isUniform(&I) << ' ' << I << '\n'; |
| 134 | + |
| 135 | + // Ensure integral types are valid: i8, i16, i32, i64, i128 |
| 136 | + if (I.getType()->isIntegerTy()) |
| 137 | + Check(IsValidInt(I.getType()), "Int type is invalid.", &I); |
| 138 | + for (unsigned i = 0; i < I.getNumOperands(); ++i) |
| 139 | + if (I.getOperand(i)->getType()->isIntegerTy()) |
| 140 | + Check(IsValidInt(I.getOperand(i)->getType()), |
| 141 | + "Int type is invalid.", I.getOperand(i)); |
| 142 | + |
| 143 | + // Ensure no store to const memory |
| 144 | + if (auto *SI = dyn_cast<StoreInst>(&I)) |
| 145 | + { |
| 146 | + unsigned AS = SI->getPointerAddressSpace(); |
| 147 | + Check(AS != 4, "Write to const memory", SI); |
| 148 | + } |
| 149 | + |
| 150 | + // Ensure no kernel to kernel calls. |
| 151 | + if (auto *CI = dyn_cast<CallInst>(&I)) |
| 152 | + { |
| 153 | + CallingConv::ID CalleeCC = CI->getCallingConv(); |
| 154 | + if (CalleeCC == CallingConv::AMDGPU_KERNEL) |
| 155 | + { |
| 156 | + CallingConv::ID CallerCC = CI->getParent()->getParent()->getCallingConv(); |
| 157 | + Check(CallerCC != CallingConv::AMDGPU_KERNEL, |
| 158 | + "A kernel may not call a kernel", CI->getParent()->getParent()); |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + // Ensure MFMA is not in control flow with diverging operands |
| 163 | + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { |
| 164 | + if (isMFMA(II->getIntrinsicID())) { |
| 165 | + bool InControlFlow = false; |
| 166 | + for (const auto &P : predecessors(&BB)) |
| 167 | + if (!PDT->dominates(&BB, P)) { |
| 168 | + InControlFlow = true; |
| 169 | + break; |
| 170 | + } |
| 171 | + for (const auto &S : successors(&BB)) |
| 172 | + if (!DT->dominates(&BB, S)) { |
| 173 | + InControlFlow = true; |
| 174 | + break; |
| 175 | + } |
| 176 | + if (InControlFlow) { |
| 177 | + // If operands to MFMA are not uniform, MFMA cannot be in control flow |
| 178 | + bool hasUniformOperands = true; |
| 179 | + for (unsigned i = 0; i < II->getNumOperands(); i++) { |
| 180 | + if (!UA->isUniform(II->getOperand(i))) { |
| 181 | + dbgs() << "Not uniform: " << *II->getOperand(i) << '\n'; |
| 182 | + hasUniformOperands = false; |
| 183 | + } |
| 184 | + } |
| 185 | + if (!hasUniformOperands) Check(false, "MFMA in control flow", II); |
| 186 | + //else Check(false, "MFMA in control flow (uniform operands)", II); |
| 187 | + } |
| 188 | + //else Check(false, "MFMA not in control flow", II); |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + } |
| 193 | +} |
| 194 | + |
| 195 | +PreservedAnalyses AMDGPUTargetVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 196 | + |
| 197 | + auto *Mod = F.getParent(); |
| 198 | + |
| 199 | + auto UA = &AM.getResult<UniformityInfoAnalysis>(F); |
| 200 | + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); |
| 201 | + auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F); |
| 202 | + |
| 203 | + AMDGPUTargetVerify TV(Mod, DT, PDT, UA); |
| 204 | + TV.run(F); |
| 205 | + |
| 206 | + dbgs() << TV.MessagesStr.str(); |
| 207 | + if (!TV.MessagesStr.str().empty()) { |
| 208 | + F.getParent()->IsValid = false; |
| 209 | + } |
| 210 | + |
| 211 | + return PreservedAnalyses::all(); |
| 212 | +} |
| 213 | +} // namespace llvm |
0 commit comments