|
6 | 6 |
|
7 | 7 | #include "compiler/plugins/target/ROCM/ROCMTargetUtils.h" |
8 | 8 |
|
| 9 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1030.h" |
| 10 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1100.h" |
| 11 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx90a.h" |
| 12 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx942.h" |
9 | 13 | #include "iree/compiler/Codegen/Utils/GPUUtils.h" |
10 | 14 | #include "iree/compiler/Dialect/HAL/Utils/LLVMLinkerUtils.h" |
11 | 15 | #include "iree/compiler/Utils/ToolUtils.h" |
| 16 | +#include "llvm/ADT/StringSwitch.h" |
12 | 17 | #include "llvm/IR/Constants.h" |
13 | 18 | #include "llvm/IR/Module.h" |
14 | 19 | #include "llvm/IRReader/IRReader.h" |
@@ -79,76 +84,28 @@ static LogicalResult linkWithBitcodeFiles(Location loc, llvm::Module *module, |
79 | 84 | } |
80 | 85 |
|
81 | 86 | static LogicalResult linkBitcodeFile(Location loc, llvm::Linker &linker, |
82 | | - unsigned linkerFlags, StringRef path, |
| 87 | + unsigned linkerFlags, StringRef filename, |
| 88 | + StringRef contents, |
83 | 89 | llvm::TargetMachine &targetMachine, |
84 | 90 | llvm::LLVMContext &context) { |
85 | | - auto bitcodeBufferRef = llvm::MemoryBuffer::getFile(path); |
86 | | - if (auto ec = bitcodeBufferRef.getError()) { |
87 | | - return mlir::emitError(loc) << "failed reading user bitcode file `" << path |
88 | | - << "`: " << ec.message(); |
89 | | - } |
| 91 | + llvm::MemoryBufferRef bitcodeBufferRef(contents, filename); |
90 | 92 | auto setAlwaysInline = [&](llvm::Module &module) { |
91 | | - if (targetMachine.getTargetCPU().contains("gfx10") || |
92 | | - targetMachine.getTargetCPU().contains("gfx11")) { |
93 | | - // Some ROCM/HIP functions for gfx10 or gfx11 has accuracy issue if |
94 | | - // inlined. |
95 | | - return; |
96 | | - } |
97 | 93 | for (auto &func : module.getFunctionList()) { |
98 | | - // Some ROCM/HIP builtin functions have Optnone and NoInline for default. |
99 | | - if (targetMachine.getTargetTriple().isAMDGCN()) { |
100 | | - if (func.hasFnAttribute(llvm::Attribute::OptimizeNone)) { |
101 | | - func.removeFnAttr(llvm::Attribute::OptimizeNone); |
102 | | - } |
103 | | - if (targetMachine.getTargetTriple().isAMDGCN() && |
104 | | - func.hasFnAttribute(llvm::Attribute::NoInline)) { |
105 | | - func.removeFnAttr(llvm::Attribute::NoInline); |
106 | | - } |
107 | | - } |
108 | 94 | func.addFnAttr(llvm::Attribute::AlwaysInline); |
109 | 95 | } |
110 | 96 | }; |
111 | | - if (failed(linkBitcodeModule( |
112 | | - loc, linker, linkerFlags, targetMachine, path, |
113 | | - llvm::parseBitcodeFile(*bitcodeBufferRef->get(), context), |
114 | | - setAlwaysInline))) { |
| 97 | + if (failed( |
| 98 | + linkBitcodeModule(loc, linker, linkerFlags, targetMachine, filename, |
| 99 | + llvm::parseBitcodeFile(bitcodeBufferRef, context), |
| 100 | + setAlwaysInline))) { |
115 | 101 | return mlir::emitError(loc) << "failed linking in user bitcode file `" |
116 | | - << path << "` for target triple '" |
| 102 | + << filename << "` for target triple '" |
117 | 103 | << targetMachine.getTargetTriple().str() << "'"; |
118 | 104 | } |
119 | 105 |
|
120 | 106 | return success(); |
121 | 107 | } |
122 | 108 |
|
123 | | -static std::vector<std::string> getUkernelPaths(StringRef enabledUkernelsStr, |
124 | | - StringRef targetChip, |
125 | | - StringRef bitcodePath) { |
126 | | - std::vector<std::string> selectedUkernelNames; |
127 | | - if (enabledUkernelsStr == "all") { |
128 | | - const char *allUkernelNames[] = {"argmax"}; |
129 | | - size_t numUkernels = sizeof(allUkernelNames) / sizeof(allUkernelNames[0]); |
130 | | - for (int i = 0; i < numUkernels; i++) { |
131 | | - selectedUkernelNames.push_back(allUkernelNames[i]); |
132 | | - } |
133 | | - } else { |
134 | | - while (!enabledUkernelsStr.empty()) { |
135 | | - auto split = enabledUkernelsStr.split(','); |
136 | | - selectedUkernelNames.push_back(split.first.str()); |
137 | | - enabledUkernelsStr = split.second; |
138 | | - } |
139 | | - } |
140 | | - |
141 | | - // Construct full path to ROCDL bitcode libraries. |
142 | | - std::vector<std::string> result; |
143 | | - std::string app = "/"; |
144 | | - for (auto &kernelName : selectedUkernelNames) { |
145 | | - std::string filename = |
146 | | - "rocm_" + kernelName + "_ukernel_" + targetChip.str(); |
147 | | - result.push_back(bitcodePath.str() + app + filename + ".bc"); |
148 | | - } |
149 | | - return result; |
150 | | -} |
151 | | - |
152 | 109 | static void overridePlatformGlobal(llvm::Module *module, StringRef globalName, |
153 | 110 | uint32_t newValue, llvm::Type *globalTy) { |
154 | 111 | // NOTE: the global will not be defined if it is not used in the module. |
@@ -228,24 +185,36 @@ LogicalResult linkHIPBitcodeIfNeeded(Location loc, llvm::Module *module, |
228 | 185 | return linkWithBitcodeFiles(loc, module, bitcodePaths); |
229 | 186 | } |
230 | 187 |
|
| 188 | +static std::tuple<const iree_file_toc_t *, int> |
| 189 | +getUkernelBitcodeTOC(StringRef gpuArch) { |
| 190 | + return llvm::StringSwitch<std::tuple<const iree_file_toc_t *, int>>(gpuArch) |
| 191 | + .Case("gfx90a", |
| 192 | + {iree_uk_amdgpu_gfx90a_create(), iree_uk_amdgpu_gfx90a_size()}) |
| 193 | + .Case("gfx942", |
| 194 | + {iree_uk_amdgpu_gfx942_create(), iree_uk_amdgpu_gfx942_size()}) |
| 195 | + .Case("gfx1030", |
| 196 | + {iree_uk_amdgpu_gfx1030_create(), iree_uk_amdgpu_gfx1030_size()}) |
| 197 | + .Case("gfx1100", |
| 198 | + {iree_uk_amdgpu_gfx1100_create(), iree_uk_amdgpu_gfx1100_size()}) |
| 199 | + .Default({nullptr, 0}); |
| 200 | +} |
| 201 | + |
231 | 202 | // Links optimized Ukernel bitcode into the given module if the module needs it. |
232 | 203 | LogicalResult linkUkernelBitcodeFiles(Location loc, llvm::Module *module, |
233 | 204 | StringRef enabledUkernelsStr, |
234 | 205 | StringRef targetChip, |
235 | 206 | StringRef bitcodePath, |
236 | 207 | unsigned linkerFlags, |
237 | 208 | llvm::TargetMachine &targetMachine) { |
238 | | - // Early exit if Ukernel not supported on target chip. |
239 | | - if (!iree_compiler::hasUkernelSupportedRocmArch(targetChip)) { |
240 | | - return mlir::emitError(loc) |
241 | | - << "ukernel '" << enabledUkernelsStr |
242 | | - << "' not supported on target chip: " << targetChip; |
| 209 | + auto [toc, toc_size] = getUkernelBitcodeTOC(targetChip); |
| 210 | + if (!toc) { |
| 211 | + return failure(); |
243 | 212 | } |
244 | | - std::vector<std::string> ukernelPaths = |
245 | | - getUkernelPaths(enabledUkernelsStr, targetChip, bitcodePath); |
| 213 | + |
246 | 214 | llvm::Linker linker(*module); |
247 | | - for (auto &path : ukernelPaths) { |
248 | | - if (failed(linkBitcodeFile(loc, linker, linkerFlags, StringRef(path), |
| 215 | + for (int i = 0; i < toc_size; ++i) { |
| 216 | + if (failed(linkBitcodeFile(loc, linker, linkerFlags, toc[i].name, |
| 217 | + llvm::StringRef(toc[i].data, toc[i].size), |
249 | 218 | targetMachine, module->getContext()))) |
250 | 219 | return failure(); |
251 | 220 | } |
|
0 commit comments