Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/anydsl_runtime-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ function(anydsl_runtime_wrap outfiles)
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_nvvm.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_amdgpu.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_opencl.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_spirv.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_thorin.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/runtime.impala
${_additional_platform_files})
Expand Down
38 changes: 38 additions & 0 deletions platforms/artic/intrinsics_opencl.impala
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#[import(cc = "device", name = "min")] fn opencl_min(i32, i32) -> i32;
#[import(cc = "device", name = "max")] fn opencl_max(i32, i32) -> i32;
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global(&mut addrspace(1)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global_f32(&mut addrspace(1)f32, f32) -> f32;
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_global(&mut addrspace(1)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32;
Expand Down Expand Up @@ -100,6 +101,43 @@ fn @opencl_accelerator(dev: i32) = Accelerator {
barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE),
};

fn spv_cl_get_num_groups() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](24 /* BuiltInNumWorkgroups */);
fn spv_cl_get_local_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](25 /* BuiltInWorkgroupSize */);
fn spv_cl_get_group_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](26 /* BuiltInWorkgroupId */);
fn spv_cl_get_local_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](27 /* BuiltInLocalInvocationId */);
fn spv_cl_get_global_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](28 /* BuiltInGlobalInvocationId */);
fn spv_cl_get_global_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](31 /* BuiltInGlobalSize */);

fn @opencl_spirv_accelerator(dev: i32) = Accelerator {
exec = @|body| |grid, block| {
let work_item = WorkItem {
tidx = @|| spv_cl_get_local_id()(0) as i32,
tidy = @|| spv_cl_get_local_id()(1) as i32,
tidz = @|| spv_cl_get_local_id()(2) as i32,
bidx = @|| spv_cl_get_local_id()(0) as i32,
bidy = @|| spv_cl_get_group_id()(1) as i32,
bidz = @|| spv_cl_get_group_id()(2) as i32,
gidx = @|| spv_cl_get_global_id()(0) as i32,
gidy = @|| spv_cl_get_global_id()(1) as i32,
gidz = @|| spv_cl_get_global_id()(2) as i32,
bdimx = @|| spv_cl_get_local_size()(0) as i32,
bdimy = @|| spv_cl_get_local_size()(1) as i32,
bdimz = @|| spv_cl_get_local_size()(2) as i32,
gdimx = @|| spv_cl_get_global_size()(0) as i32,
gdimy = @|| spv_cl_get_global_size()(1) as i32,
gdimz = @|| spv_cl_get_global_size()(2) as i32,
nblkx = @|| spv_cl_get_num_groups()(0) as i32,
nblky = @|| spv_cl_get_num_groups()(1) as i32,
nblkz = @|| spv_cl_get_num_groups()(2) as i32
};
opencl_spirv(dev, grid, block, || @body(work_item))
},
sync = @|| synchronize_opencl(dev),
alloc = @|size| alloc_opencl(dev, size),
alloc_unified = @|size| alloc_opencl_unified(dev, size),
barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE),
};

static opencl_intrinsics = Intrinsics {
expf = opencl_expf,
exp2f = opencl_exp2f,
Expand Down
1 change: 1 addition & 0 deletions platforms/artic/intrinsics_spirv.impala
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T;
1 change: 1 addition & 0 deletions platforms/artic/intrinsics_thorin.impala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#[import(cc = "thorin")] fn cuda(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn opencl_spirv(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T];
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ if(RUNTIME_JIT)
../platforms/${frontend}/intrinsics_nvvm.impala
../platforms/${frontend}/intrinsics_amdgpu.impala
../platforms/${frontend}/intrinsics_opencl.impala
../platforms/${frontend}/intrinsics_spirv.impala
../platforms/${frontend}/intrinsics_thorin.impala
../platforms/${frontend}/intrinsics.impala
../platforms/${frontend}/runtime.impala)
Expand Down
65 changes: 44 additions & 21 deletions src/opencl_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,25 +357,32 @@ void time_kernel_callback(cl_event event, cl_int, void* data) {
CHECK_OPENCL(err, "clReleaseEvent()");
}

static inline bool ends_with(std::string_view str, std::string_view suffix) {
if (str.size() < suffix.size())
return false;
return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
}

void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_params) {
if (devices_[dev].is_intel_fpga && launch_params.num_args == 0) {
debug("processing by autorun kernel");
return;
}

auto kernel = load_kernel(dev, launch_params.file_name, launch_params.kernel_name);
bool is_spirv = ends_with(launch_params.file_name, ".spv");

// set up arguments
std::vector<cl_mem> kernel_structs(launch_params.num_args);
std::vector<cl_mem> kernel_structs;
for (uint32_t i = 0; i < launch_params.num_args; i++) {
if (launch_params.args.types[i] == KernelArgType::Struct) {
if (!is_spirv && launch_params.args.types[i] == KernelArgType::Struct) {
// create a buffer for each structure argument
cl_int err = CL_SUCCESS;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
cl_mem struct_buf = clCreateBuffer(devices_[dev].ctx, flags, launch_params.args.sizes[i], launch_params.args.data[i], &err);
CHECK_OPENCL(err, "clCreateBuffer()");
kernel_structs[i] = struct_buf;
clSetKernelArg(kernel, i, sizeof(cl_mem), &kernel_structs[i]);
kernel_structs.push_back(struct_buf);
clSetKernelArg(kernel, i, sizeof(cl_mem), &struct_buf);
} else {
#ifdef CL_VERSION_2_0
if (launch_params.args.types[i] == KernelArgType::Ptr && devices_[dev].version_major == 2) {
Expand Down Expand Up @@ -421,11 +428,9 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para
dynamic_profile(dev, launch_params.file_name);

// release temporary buffers for struct arguments
for (uint32_t i = 0; i < launch_params.num_args; i++) {
if (launch_params.args.types[i] == KernelArgType::Struct) {
cl_int err = clReleaseMemObject(kernel_structs[i]);
CHECK_OPENCL(err, "clReleaseMemObject()");
}
for (auto tmp : kernel_structs) {
cl_int err = clReleaseMemObject(tmp);
CHECK_OPENCL(err, "clReleaseMemObject()");
}
}

Expand Down Expand Up @@ -515,6 +520,21 @@ cl_program OpenCLPlatform::load_program_binary(DeviceId dev, const std::string&
return program;
}

cl_program OpenCLPlatform::load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const {
#if CL_VERSION_2_1
const size_t program_length = program_string.length();
const char* program_c_str = program_string.c_str();
cl_int err = CL_SUCCESS;
cl_program program = clCreateProgramWithIL(devices_[dev].ctx, (const void*)program_c_str, program_length, &err);
CHECK_OPENCL(err, "clCreateProgramWithIL()");
debug("Loading IL '%' for OpenCL device %", filename, dev);

return program;
#else
error("OpenCL 2.1 or later is required for SPIR-V support.");
#endif
}

cl_program OpenCLPlatform::load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const {
const size_t program_length = program_string.length();
const char* program_c_str = program_string.c_str();
Expand Down Expand Up @@ -589,25 +609,28 @@ cl_kernel OpenCLPlatform::load_kernel(DeviceId dev, const std::string& filename,
if (prog_it == prog_cache.end()) {
opencl_dev.unlock();

if (canonical.extension() != ".cl")
error("Incorrect extension for kernel file '%' (should be '.cl')", canonical.string());

// load file from disk or cache
auto src_path = canonical;
if (opencl_dev.is_intel_fpga)
src_path.replace_extension(".aocx");
std::string src_code = runtime_->load_file(src_path.string());

// compile src or load from cache
std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code);
if (bin.empty()) {
program = load_program_source(dev, src_path.string(), src_code);
if (canonical.extension() == ".spv") {
program = load_program_il(dev, src_path.string(), src_code);
program = compile_program(dev, program, src_path.string());
runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program));
} else {
program = load_program_binary(dev, src_path.string(), bin);
program = compile_program(dev, program, src_path.string());
}
} else if (canonical.extension() == ".cl") {
// compile src or load from cache
std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code);
if (bin.empty()) {
program = load_program_source(dev, src_path.string(), src_code);
program = compile_program(dev, program, src_path.string());
runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program));
} else {
program = load_program_binary(dev, src_path.string(), bin);
program = compile_program(dev, program, src_path.string());
}
} else
error("Incorrect extension for kernel file '%' (should be '.cl' or .'spv')", canonical.string());

opencl_dev.lock();
prog_cache[canonical.string()] = program;
Expand Down
1 change: 1 addition & 0 deletions src/opencl_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class OpenCLPlatform : public Platform {

cl_kernel load_kernel(DeviceId dev, const std::string& filename, const std::string& kernelname);
cl_program load_program_binary(DeviceId dev, const std::string& filename, const std::string& program_string) const;
cl_program load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const;
cl_program load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const;
cl_program compile_program(DeviceId dev, cl_program program, const std::string& filename) const;

Expand Down