-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathvortex_utils.cc
More file actions
351 lines (295 loc) · 11.7 KB
/
vortex_utils.cc
File metadata and controls
351 lines (295 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
#include <cstdarg>
#include <vector>
#include <cstdio>
#include <ostream>
#include <iostream>
#include <fstream>
#include <sstream>
#include <regex>
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Linker/Linker.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#if LLVM_MAJOR >= 17
#include <llvm/Transforms/IPO/Internalize.h>
#endif
#include "vortex_utils.h"
#include <llvm/IR/Module.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Bitcode/BitcodeWriter.h>
#include "pocl.h"
#include "pocl_file_util.h"
#include "pocl_util.h"
#include "LLVMUtils.h"
#include "kernel_args.h"
static int exec(const char* cmd, std::ostream& out) {
char buffer[128];
auto pipe = popen(cmd, "r");
if (!pipe) {
//throw std::runtime_error("popen() failed!");
return -1;
}
while (!feof(pipe)) {
if (fgets(buffer, 128, pipe) != nullptr)
out << buffer;
}
return pclose(pipe);
}
void remove_extension(char* filename) {
char *last_dot = strrchr(filename, '.');
if (last_dot != NULL) {
*last_dot = '\0';
}
}
static char* convertToCharArray(const llvm::SmallVector<std::string, 8>& names) {
// Calculate the total length required for the buffer
size_t totalLength = 0;
for (const auto& name : names) {
totalLength += name.size() + 1; // +1 for the null terminator
}
// Allocate buffer
char* buffer = (char*)malloc(totalLength * sizeof(char));
if (buffer == nullptr) {
std::cerr << "Memory allocation failed" << std::endl;
return nullptr;
}
// Copy names into buffer with null separation
size_t position = 0;
for (const auto& name : names) {
std::strcpy(buffer + position, name.c_str());
position += name.size();
buffer[position] = '\0'; // Null terminator
position += 1;
}
return buffer;
}
// Store function arguments in a single argument buffer.
static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module, llvm::SmallVector<std::string, 8>& funcNames) {
auto &Context = module->getContext();
const llvm::DataLayout &DL = module->getDataLayout();
std::string TargetTriple = module->getTargetTriple();
bool is64Bit = TargetTriple.find("riscv64") != std::string::npos;
auto I32Ty = llvm::Type::getInt32Ty(Context);
auto I8Ty = llvm::Type::getInt8Ty(Context);
auto I8PtrTy = I8Ty->getPointerTo();
// Create new function signature
auto NewFuncType = llvm::FunctionType::get(function->getReturnType(), {I8PtrTy}, false);
auto NewFunc = llvm::Function::Create(NewFuncType, function->getLinkage(), function->getName() + "_vortex");
module->getFunctionList().insert(function->getIterator(), NewFunc);
NewFunc->takeName(function);
auto EntryBlock = llvm::BasicBlock::Create(Context, "entry", NewFunc);
llvm::IRBuilder<> Builder(EntryBlock);
// Access function arguments
auto ai = NewFunc->arg_begin();
auto ArgBuffer = &*ai++;
ArgBuffer->setName("ArgBuffer");
unsigned arg_idx = 0;
unsigned arg_offset = 0;
llvm::Value* allocated_local_mem = nullptr;
auto MDS = llvm::MDNode::get(Context, llvm::MDString::get(Context, "vortex.uniform"));
uint32_t BaseAlignment = is64Bit ? 8 : 4;
for (auto& OldArg : function->args()) {
auto ArgType = OldArg.getType();
auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset);
llvm::Value* Arg;
if (pocl::isLocalMemFunctionArg(function, arg_idx)) {
if (allocated_local_mem == nullptr) {
// Load __local_size
auto local_size_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, "__local_size_ptr");
auto local_size = Builder.CreateLoad(I32Ty, local_size_ptr, "__local_size");
arg_offset = ALIGN_OFFSET(arg_offset + 4, BaseAlignment);
// Call vx_local_alloc(__local_size)
auto function_type = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false);
auto vx_local_alloc_func = module->getOrInsertFunction("vx_local_alloc", function_type);
allocated_local_mem = Builder.CreateCall(vx_local_alloc_func, {local_size}, "__local_mem");
}
// Load argument __offset
auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset);
auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
auto offset = Builder.CreateLoad(I32Ty, offset_ptr, OldArg.getName() + "_offset");
arg_offset = ALIGN_OFFSET(arg_offset + 4, BaseAlignment);
// Apply pointer offset
Arg = Builder.CreateGEP(I8Ty, allocated_local_mem, offset, OldArg.getName() + "_byte_ptr");
} else {
auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded");
arg_offset = ALIGN_OFFSET(arg_offset + DL.getTypeAllocSize(ArgType), BaseAlignment);
}
auto instr = llvm::cast<llvm::Instruction>(Arg);
assert(instr != nullptr);
instr->setMetadata("vortex.uniform", MDS);
OldArg.replaceAllUsesWith(Arg);
arg_idx += 1;
}
// Move the body of the old function to the new function
NewFunc->splice(NewFunc->end(), function);
// Connect the entry block to the first block of the old function
for (auto& BB : *NewFunc) {
if (&BB != EntryBlock) {
Builder.CreateBr(&BB);
break;
}
}
funcNames.push_back(NewFunc->getName().str());
return true;
}
static void processKernels(llvm::SmallVector<std::string, 8>& funcNames, llvm::Module *module) {
llvm::SmallVector<llvm::Function *, 8> functionsToErase;
for (auto& function : module->functions()) {
if (!pocl::isKernelToProcess(function))
continue;
if (createArgumentsBuffer(&function, module, funcNames))
functionsToErase.push_back(&function);
}
for (auto function : functionsToErase) {
function->eraseFromParent();
}
}
static void addKernelSelect(llvm::SmallVector<std::string, 8>& funcNames, llvm::Module *module) {
auto& Context = module->getContext();
auto I32Ty = llvm::Type::getInt32Ty(Context);
auto I8Ty = llvm::Type::getInt8Ty(Context);
auto I8PtrTy = I8Ty->getPointerTo();
auto GetKernelCallbackTy = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false);
auto GetKernelCallbackFunc = llvm::Function::Create(
GetKernelCallbackTy, llvm::Function::ExternalLinkage, "__vx_get_kernel_callback", module);
llvm::IRBuilder<> Builder(Context);
auto EntryBB = llvm::BasicBlock::Create(Context, "entry", GetKernelCallbackFunc);
Builder.SetInsertPoint(EntryBB);
// Get the function argument (kernel_index)
auto Args = GetKernelCallbackFunc->arg_begin();
auto KernelIndex = Args++;
KernelIndex->setName("kernel_index");
// Prepare the switch instruction
auto Switch = Builder.CreateSwitch(KernelIndex, EntryBB);
// Iterate through the functions in the module and create cases for the switch
int FunctionIndex = 0;
for (llvm::Function& F : module->functions()) {
if (std::find(funcNames.begin(), funcNames.end(), F.getName().str()) == funcNames.end())
continue;
// Create a basic block for this function index
auto CaseBB = llvm::BasicBlock::Create(Context, "case_" + std::to_string(FunctionIndex), GetKernelCallbackFunc);
Builder.SetInsertPoint(CaseBB);
// Return the function pointer
Builder.CreateRet(Builder.CreateBitCast(&F, GetKernelCallbackTy->getReturnType()));
// Add the case to the switch statement
Switch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), FunctionIndex), CaseBB);
++FunctionIndex;
}
// Create a default case block for out-of-bounds indices
auto DefaultBB = llvm::BasicBlock::Create(Context, "default", GetKernelCallbackFunc);
Builder.SetInsertPoint(DefaultBB);
Builder.CreateRet(llvm::ConstantPointerNull::get(llvm::cast<llvm::PointerType>(GetKernelCallbackTy->getReturnType())));
Switch->setDefaultDest(DefaultBB);
}
int compile_vortex_program(char**kernel_names, int* num_kernels, char* sz_program_vxbin, void* llvm_module) {
int err;
const char* llvm_install_path = getenv("LLVM_PREFIX");
if (llvm_install_path) {
if (!pocl_exists(llvm_install_path)) {
POCL_MSG_ERR("$LLVM_PREFIX: '%s' doesn't exist\n", llvm_install_path);
return -1;
}
POCL_MSG_PRINT_INFO("using $LLVM_PREFIX=%s!\n", llvm_install_path);
}
std::string build_cflags = pocl_get_string_option("POCL_VORTEX_CFLAGS", "");
if (build_cflags == "") {
POCL_MSG_ERR("'POCL_VORTEX_CFLAGS' need to be set\n");
return -1;
}
std::string build_ldflags = pocl_get_string_option ("POCL_VORTEX_LDFLAGS", "");
if(build_ldflags == ""){
POCL_MSG_ERR("'POCL_VORTEX_LDFLAGS' need to be set\n");
return -1;
}
char sz_program_bc[POCL_MAX_PATHNAME_LENGTH + 1];
err = pocl_mk_tempname(sz_program_bc, "/tmp/pocl_vortex_program", ".bc", nullptr);
if (err != 0)
return err;
char sz_program_elf[POCL_MAX_PATHNAME_LENGTH + 1];
err = pocl_mk_tempname(sz_program_elf, "/tmp/pocl_vortex_program", ".elf", nullptr);
if (err != 0)
return err;
auto module = (llvm::Module *)llvm_module;
llvm::SmallVector<std::string, 8> kernelNames;
processKernels(kernelNames, module);
addKernelSelect(kernelNames, module);
*num_kernels = kernelNames.size();
*kernel_names = convertToCharArray(kernelNames);
{
std::error_code EC;
llvm::raw_fd_ostream file(sz_program_bc, EC, llvm::sys::fs::OF_None);
llvm::WriteBitcodeToFile(*module, file);
file.close();
}
if (POCL_DEBUGGING_ON) {
std::error_code EC;
llvm::raw_fd_ostream file("program.ll", EC, llvm::sys::fs::OF_None);
module->print(file, nullptr);
file.close();
}
{
std::string clang_path(CLANG);
if (llvm_install_path) {
clang_path.replace(0, strlen(LLVM_PREFIX), llvm_install_path);
}
char sz_kernel_main[POCL_MAX_PATHNAME_LENGTH];
pocl_get_srcdir_or_datadir (sz_kernel_main, "/lib/CL/devices", "", "/vortex/kernel_main.c");
std::stringstream ss_cmd, ss_out;
ss_cmd << clang_path.c_str() << " " << build_cflags << " " << sz_program_bc << " " << sz_kernel_main << " " << build_ldflags << " -o " << sz_program_elf;
POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
int err = exec(ss_cmd.str().c_str(), ss_out);
if (err != 0) {
POCL_MSG_ERR("%s\n", ss_out.str().c_str());
return err;
}
}
if (POCL_DEBUGGING_ON) {
std::string objdump_path(LLVM_OBJDUMP);
if (llvm_install_path) {
objdump_path.replace(0, strlen(LLVM_PREFIX), llvm_install_path);
}
std::stringstream ss_cmd, ss_out;
ss_cmd << objdump_path.c_str() << " -D " << sz_program_elf << " > program.dump";
POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
int err = exec(ss_cmd.str().c_str(), ss_out);
if (err != 0) {
POCL_MSG_ERR("%s\n", ss_out.str().c_str());
return err;
}
}
{
std::string vxbintool_path = pocl_get_string_option ("POCL_VORTEX_BINTOOL", "");
if (vxbintool_path == ""){
POCL_MSG_ERR("'POCL_VORTEX_BINTOOL' need to be set\n");
return -1;
}
std::stringstream ss_cmd, ss_out;
ss_cmd << vxbintool_path << " " << sz_program_elf << " " << sz_program_vxbin;
POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
int err = exec(ss_cmd.str().c_str(), ss_out);
if (err != 0) {
POCL_MSG_ERR("%s\n", ss_out.str().c_str());
return err;
}
}
return 0;
}