Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
96a253d
vulkan platform stub
Hugobros3 Apr 1, 2021
7e82fe0
don't use vulkan.hpp
Hugobros3 Apr 12, 2021
2a899e3
validation layers
Hugobros3 Apr 12, 2021
f9990be
create a device
Hugobros3 Apr 12, 2021
f8ebd58
buffer stuff stub
Hugobros3 Apr 12, 2021
0a7ba44
added importing host memory
Hugobros3 Apr 13, 2021
7d1d8cc
copy commands
Hugobros3 Apr 13, 2021
00168eb
load kernel boilerplate
Hugobros3 Apr 14, 2021
bd237ab
factor out single-use cmdbuf creation
Hugobros3 Apr 15, 2021
7fa16ee
load ext functions properly
Hugobros3 Apr 15, 2021
ce5231c
fix memory import (needs testing still)
Hugobros3 Apr 15, 2021
f301c0d
cleanup leftover resources on backend shutdown
Hugobros3 Apr 15, 2021
2aec7e0
initial support for args/bda
Hugobros3 Apr 22, 2021
1966c4d
upgrade BDA to KHR variant :/
Hugobros3 Apr 23, 2021
640c962
making validation happy
Hugobros3 Apr 23, 2021
e37a2a4
derp
Hugobros3 Apr 27, 2021
9f326cb
implement copy to host
Hugobros3 Apr 27, 2021
7829ddc
put kernels in unique_ptrs
Hugobros3 Apr 28, 2021
b164f49
refactored mem management
Hugobros3 Apr 28, 2021
5d32e13
enable some capabilities for debug printf
Hugobros3 Apr 28, 2021
b33b403
working upload/download (using staging bufs)
Hugobros3 Apr 29, 2021
80bb4c3
fixed importing memory
Hugobros3 Apr 29, 2021
79b9f68
cleanup
Hugobros3 May 3, 2021
146fb63
draft for vk intrinsics bindings
Hugobros3 May 5, 2021
6625cd7
fiddling
Hugobros3 May 5, 2021
d4efe54
cute hack arround certain invocation id intrinsics
Hugobros3 May 6, 2021
ca0456b
Handle lack of CUDA devices gracefully
Hugobros3 Dec 1, 2022
e6b895a
added stub for shady runtime
Hugobros3 Dec 1, 2022
3f4d66d
can run a trivial program
Hugobros3 Dec 2, 2022
804caa3
corrected grid size
Hugobros3 Dec 2, 2022
993c768
Merge branch 'master' into vulkan
Hugobros3 Oct 8, 2025
0fa2d56
implement missing methods in VulkanPlatform
Hugobros3 Oct 8, 2025
98ff22c
cmake: add Vulkan platform files
Hugobros3 Oct 9, 2025
1e09aff
wire up vulkan intrinsics based off SPIR-V
Hugobros3 Oct 9, 2025
d19428c
updated shady API and implement more stuff
Hugobros3 Oct 9, 2025
2305ea8
Merge branch 'master' into shady
Hugobros3 Oct 9, 2025
9e0722b
Merge branch 'shady' into vulkan
Hugobros3 Oct 9, 2025
c00bfc7
shady: minor fixes
Hugobros3 Oct 9, 2025
e33fb40
modernizing the old Vulkan runtime
Hugobros3 Oct 17, 2025
1e142cd
vulkan: fix buffers
Hugobros3 Oct 17, 2025
924df20
vulkan: moved memory import code
Hugobros3 Oct 17, 2025
803ed92
vulkan: start drafting non-invasive shady runtime integration
Hugobros3 Oct 17, 2025
fa2fc38
runtime: ask shady to JIT the code
Hugobros3 Oct 22, 2025
0d2c9be
remove shady runner support
Hugobros3 Oct 22, 2025
c56eb53
add more vulkan offloading intrinsics
Hugobros3 Oct 24, 2025
dbad9a4
fix some spirv intrinsics
Hugobros3 Oct 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/anydsl_runtime-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ function(anydsl_runtime_wrap outfiles)
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_amdgpu.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_opencl.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_thorin.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_vulkan.impala
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/runtime.impala
${_additional_platform_files})

Expand Down
5 changes: 4 additions & 1 deletion platforms/artic/intrinsics_spirv.impala
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T;
#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T;

#[import(cc = "device", name = "spirv.global")] fn spirv_make_global_variable[T]() -> T;
#[import(cc = "device", name = "spirv.decorate")] fn spirv_decorate_literal[T](T, u32, u32) -> ();
11 changes: 11 additions & 0 deletions platforms/artic/intrinsics_thorin.impala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@
#[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn levelzero(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
#[import(cc = "thorin")] fn vulkan_cs(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();

struct VulkanOffloadInfo {
// talks to the runtime to setup this pipeline for execution
setup_offloaded_args: fn() -> (),
filename: &[u8],
num_stages: u32,
stages: &[(u32, &[u8])]
}

//#[import(cc = "thorin")] fn vulkan_offload(_num_stages: u32, stages: &[(u32, fn() -> ())]) -> VulkanOffloadInfo;
#[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T];
#[import(cc = "thorin")] fn hls(_dev: i32, _body: fn() -> ()) -> ();
#[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend
Expand Down
153 changes: 153 additions & 0 deletions platforms/artic/intrinsics_vulkan.impala
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// no declarations are emitted for "device" functions
#[import(cc = "device", name = "barrier")] fn vulkan_barrier(u32) -> ();
#[import(cc = "device", name = "exp")] fn vulkan_expf(f32) -> f32;
#[import(cc = "device", name = "exp2")] fn vulkan_exp2f(f32) -> f32;
#[import(cc = "device", name = "log")] fn vulkan_logf(f32) -> f32;
#[import(cc = "device", name = "log2")] fn vulkan_log2f(f32) -> f32;
#[import(cc = "device", name = "pow")] fn vulkan_powf(f32, f32) -> f32;
#[import(cc = "device", name = "rsqrt")] fn vulkan_rsqrtf(f32) -> f32;
#[import(cc = "device", name = "sqrt")] fn vulkan_sqrtf(f32) -> f32;
#[import(cc = "device", name = "fabs")] fn vulkan_fabsf(f32) -> f32;
#[import(cc = "device", name = "sin")] fn vulkan_sinf(f32) -> f32;
#[import(cc = "device", name = "cos")] fn vulkan_cosf(f32) -> f32;
#[import(cc = "device", name = "tan")] fn vulkan_tanf(f32) -> f32;
#[import(cc = "device", name = "asin")] fn vulkan_asinf(f32) -> f32;
#[import(cc = "device", name = "acos")] fn vulkan_acosf(f32) -> f32;
#[import(cc = "device", name = "atan")] fn vulkan_atanf(f32) -> f32;
#[import(cc = "device", name = "erf")] fn vulkan_erff(f32) -> f32;
#[import(cc = "device", name = "atan2")] fn vulkan_atan2f(f32, f32) -> f32;
#[import(cc = "device", name = "fmod")] fn vulkan_fmodf(f32, f32) -> f32;
#[import(cc = "device", name = "floor")] fn vulkan_floorf(f32) -> f32;
#[import(cc = "device", name = "isinf")] fn vulkan_isinff(f32) -> i32;
#[import(cc = "device", name = "isnan")] fn vulkan_isnanf(f32) -> i32;
#[import(cc = "device", name = "isfinite")] fn vulkan_isfinitef(f32) -> i32;
#[import(cc = "device", name = "fma")] fn vulkan_fmaf(f32, f32, f32) -> f32;
#[import(cc = "device", name = "mad")] fn vulkan_madf(f32, f32, f32) -> f32;
#[import(cc = "device", name = "copysign")] fn vulkan_copysignf(f32, f32) -> f32;
#[import(cc = "device", name = "exp")] fn vulkan_exp(f64) -> f64;
#[import(cc = "device", name = "exp2")] fn vulkan_exp2(f64) -> f64;
#[import(cc = "device", name = "log")] fn vulkan_log(f64) -> f64;
#[import(cc = "device", name = "log2")] fn vulkan_log2(f64) -> f64;
#[import(cc = "device", name = "pow")] fn vulkan_pow(f64, f64) -> f64;
#[import(cc = "device", name = "rsqrt")] fn vulkan_rsqrt(f64) -> f64;
#[import(cc = "device", name = "sqrt")] fn vulkan_sqrt(f64) -> f64;
#[import(cc = "device", name = "fabs")] fn vulkan_fabs(f64) -> f64;
#[import(cc = "device", name = "sin")] fn vulkan_sin(f64) -> f64;
#[import(cc = "device", name = "cos")] fn vulkan_cos(f64) -> f64;
#[import(cc = "device", name = "tan")] fn vulkan_tan(f64) -> f64;
#[import(cc = "device", name = "asin")] fn vulkan_asin(f64) -> f64;
#[import(cc = "device", name = "acos")] fn vulkan_acos(f64) -> f64;
#[import(cc = "device", name = "atan")] fn vulkan_atan(f64) -> f64;
#[import(cc = "device", name = "erf")] fn vulkan_erf(f64) -> f64;
#[import(cc = "device", name = "atan2")] fn vulkan_atan2(f64, f64) -> f64;
#[import(cc = "device", name = "fmod")] fn vulkan_fmod(f64, f64) -> f64;
#[import(cc = "device", name = "floor")] fn vulkan_floor(f64) -> f64;
#[import(cc = "device", name = "isinf")] fn vulkan_isinf(f64) -> i32;
#[import(cc = "device", name = "isnan")] fn vulkan_isnan(f64) -> i32;
#[import(cc = "device", name = "isfinite")] fn vulkan_isfinite(f64) -> i32;
#[import(cc = "device", name = "fma")] fn vulkan_fma(f64, f64, f64) -> f64;
#[import(cc = "device", name = "mad")] fn vulkan_mad(f64, f64, f64) -> f64;
#[import(cc = "device", name = "copysign")] fn vulkan_copysign(f64, f64) -> f64;
#[import(cc = "device", name = "fmin")] fn vulkan_fminf(f32, f32) -> f32;
#[import(cc = "device", name = "fmax")] fn vulkan_fmaxf(f32, f32) -> f32;
#[import(cc = "device", name = "fmin")] fn vulkan_fmin(f64, f64) -> f64;
#[import(cc = "device", name = "fmax")] fn vulkan_fmax(f64, f64) -> f64;
#[import(cc = "device", name = "min")] fn vulkan_min(i32, i32) -> i32;
#[import(cc = "device", name = "max")] fn vulkan_max(i32, i32) -> i32;
#[import(cc = "device", name = "atomic_add")] fn vulkan_atomic_add_global(&mut addrspace(1)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_add")] fn vulkan_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_global(&mut addrspace(1)i32, i32) -> i32;
#[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32;

fn spv_vk_get_num_groups() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](24 /* BuiltInNumWorkgroups */);
fn spv_vk_get_local_size() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](25 /* BuiltInWorkgroupSize */);
fn spv_vk_get_group_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](26 /* BuiltInWorkgroupId */);
fn spv_vk_get_local_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](27 /* BuiltInLocalInvocationId */);
fn spv_vk_get_global_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](28 /* BuiltInGlobalInvocationId */);

fn @vulkan_get_global_size(dim: u32) -> i32 = (spv_vk_get_local_size()(dim) * spv_vk_get_num_groups()(dim)) as i32;

fn @vulkan_accelerator(dev: i32) = Accelerator {
exec = @|body| |grid, block| {
let work_item = WorkItem {
tidx = @|| spv_vk_get_local_id()(0) as i32,
tidy = @|| spv_vk_get_local_id()(1) as i32,
tidz = @|| spv_vk_get_local_id()(2) as i32,
bidx = @|| spv_vk_get_group_id()(0) as i32,
bidy = @|| spv_vk_get_group_id()(1) as i32,
bidz = @|| spv_vk_get_group_id()(2) as i32,
gidx = @|| spv_vk_get_global_id()(0) as i32,
gidy = @|| spv_vk_get_global_id()(1) as i32,
gidz = @|| spv_vk_get_global_id()(2) as i32,
bdimx = @|| spv_vk_get_local_size()(0) as i32,
bdimy = @|| spv_vk_get_local_size()(1) as i32,
bdimz = @|| spv_vk_get_local_size()(2) as i32,
gdimx = @|| vulkan_get_global_size(0) as i32,
gdimy = @|| vulkan_get_global_size(1) as i32,
gdimz = @|| vulkan_get_global_size(2) as i32,
nblkx = @|| spv_vk_get_num_groups()(0) as i32,
nblky = @|| spv_vk_get_num_groups()(1) as i32,
nblkz = @|| spv_vk_get_num_groups()(2) as i32
};
vulkan_cs(dev, grid, block, || @body(work_item))
},
sync = @|| synchronize_vulkan(dev),
alloc = @|size| alloc_vulkan(dev, size),
alloc_unified = @|size| alloc_opencl_unified(dev, size),
barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE),
};

static vulkan_intrinsics = Intrinsics {
expf = vulkan_expf,
exp2f = vulkan_exp2f,
logf = vulkan_logf,
log2f = vulkan_log2f,
powf = vulkan_powf,
rsqrtf = vulkan_rsqrtf,
sqrtf = vulkan_sqrtf,
fabsf = vulkan_fabsf,
sinf = vulkan_sinf,
cosf = vulkan_cosf,
tanf = vulkan_tanf,
asinf = vulkan_asinf,
acosf = vulkan_acosf,
atanf = vulkan_atanf,
erff = vulkan_erff,
atan2f = vulkan_atan2f,
copysignf = vulkan_copysignf,
fmaf = vulkan_fmaf,
fmaxf = vulkan_fmaxf,
fminf = vulkan_fminf,
fmodf = vulkan_fmodf,
floorf = vulkan_floorf,
isinff = vulkan_isinff,
isnanf = vulkan_isnanf,
isfinitef = vulkan_isfinitef,
exp = vulkan_exp,
exp2 = vulkan_exp2,
log = vulkan_log,
log2 = vulkan_log2,
pow = vulkan_pow,
rsqrt = vulkan_rsqrt,
sqrt = vulkan_sqrt,
fabs = vulkan_fabs,
sin = vulkan_sin,
cos = vulkan_cos,
tan = vulkan_tan,
asin = vulkan_asin,
acos = vulkan_acos,
atan = vulkan_atan,
erf = vulkan_erf,
atan2 = vulkan_atan2,
copysign = vulkan_copysign,
fma = vulkan_fma,
fmax = vulkan_fmax,
fmin = vulkan_fmin,
fmod = vulkan_fmod,
floor = vulkan_floor,
isinf = vulkan_isinf,
isnan = vulkan_isnan,
isfinite = vulkan_isfinite,
min = vulkan_min,
max = vulkan_max,
};
4 changes: 4 additions & 0 deletions platforms/artic/runtime.impala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#[import(cc = "C", name = "anydsl_print_string")] fn print_string(_: &[u8]) -> ();
#[import(cc = "C", name = "anydsl_print_flush")] fn print_flush() -> ();

#[import(cc = "C", name = "anydsl_load_offloaded")] fn runtime_load_offloaded(_device: i32, _filename: &[u8], _name: &[u8], _size: &mut u64) -> &[u8];

// TODO
//struct Buffer[T] {
// data : &mut [T],
Expand Down Expand Up @@ -123,6 +125,8 @@ fn @alloc_levelzero(dev: i32, size: i64) = alloc(runtime_device(5, dev), size);
fn @alloc_levelzero_host(dev: i32, size: i64) = alloc_host(runtime_device(5, dev), size);
fn @alloc_levelzero_unified(dev: i32, size: i64) = alloc_unified(runtime_device(5, dev), size);
fn @synchronize_levelzero(dev: i32) = runtime_synchronize(runtime_device(5, dev));
fn @synchronize_vulkan(dev: i32) = runtime_synchronize(runtime_device(6, dev));
fn @alloc_vulkan(dev: i32, size: i64) = alloc(runtime_device(6, dev), size);

fn @copy(src: Buffer, dst: Buffer) = runtime_copy(src.device, src.data, 0, dst.device, dst.data, 0, src.size);
fn @copy_offset(src: Buffer, off_src: i64, dst: Buffer, off_dst: i64, size: i64) = runtime_copy(src.device, src.data, off_src, dst.device, dst.data, off_dst, size);
Expand Down
10 changes: 10 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ if(pal_FOUND)
endif()
set(AnyDSL_runtime_HAS_PAL_SUPPORT ${pal_FOUND} CACHE INTERNAL "enables PAL support")

find_package(shady)
find_package(Vulkan)
if(Vulkan_FOUND AND shady_FOUND)
add_library(runtime_vulkan STATIC vulkan_platform.cpp vulkan_platform.h)
target_include_directories(runtime_vulkan PRIVATE ${Vulkan_INCLUDE_DIRS})
target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES} shady::runtime shady::api shady::driver)
list(APPEND RUNTIME_PLATFORMS runtime_vulkan)
endif()
set(AnyDSL_runtime_HAS_Vulkan_SUPPORT ${Vulkan_FOUND} CACHE INTERNAL "enables Vulkan support")

# look for LLVM for nvptx and gcn
find_package(LLVM CONFIG)
if(LLVM_FOUND)
Expand Down
1 change: 1 addition & 0 deletions src/anydsl_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct RuntimeSingleton {
register_hsa_platform(&runtime);
register_pal_platform(&runtime);
register_levelzero_platform(&runtime);
register_vulkan_platform(&runtime);
}

static std::pair<ProfileLevel, ProfileLevel> detect_profile_level() {
Expand Down
3 changes: 2 additions & 1 deletion src/anydsl_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ enum {
ANYDSL_OPENCL = 2,
ANYDSL_HSA = 3,
ANYDSL_PAL = 4,
ANYDSL_LEVELZERO = 5
ANYDSL_LEVELZERO = 5,
ANYDSL_Vulkan = 6
};

AnyDSL_runtime_API void anydsl_info(void);
Expand Down
3 changes: 2 additions & 1 deletion src/anydsl_runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ enum class Platform : int32_t {
OpenCL = ANYDSL_OPENCL,
HSA = ANYDSL_HSA,
PAL = ANYDSL_PAL,
LevelZero = ANYDSL_LEVELZERO
LevelZero = ANYDSL_LEVELZERO,
Vulkan = ANYDSL_Vulkan
};

struct Device {
Expand Down
2 changes: 2 additions & 0 deletions src/anydsl_runtime_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#cmakedefine AnyDSL_runtime_HAS_LEVELZERO_SUPPORT
#cmakedefine AnyDSL_runtime_HAS_HSA_SUPPORT
#cmakedefine AnyDSL_runtime_HAS_PAL_SUPPORT
#cmakedefine AnyDSL_runtime_HAS_SHADY_SUPPORT
#cmakedefine AnyDSL_runtime_HAS_Vulkan_SUPPORT
#cmakedefine AnyDSL_runtime_HAS_TBB_SUPPORT


Expand Down
1 change: 1 addition & 0 deletions src/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ void register_opencl_platform(Runtime*);
void register_hsa_platform(Runtime*);
void register_pal_platform(Runtime*);
void register_levelzero_platform(Runtime*);
void register_vulkan_platform(Runtime*);

/// A runtime platform. Exposes a set of devices, a copy function,
/// and functions to allocate and release memory.
Expand Down
3 changes: 3 additions & 0 deletions src/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ void register_pal_platform(Runtime* runtime) { runtime->register_platform<DummyP
#ifndef AnyDSL_runtime_HAS_LEVELZERO_SUPPORT
void register_levelzero_platform(Runtime* runtime) { runtime->register_platform<DummyPlatform>("Level Zero"); }
#endif
#ifndef AnyDSL_runtime_HAS_Vulkan_SUPPORT
void register_vulkan_platform(Runtime* runtime) { runtime->register_platform<DummyPlatform>("Vulkan"); }
#endif

Runtime::Runtime(std::pair<ProfileLevel, ProfileLevel> profile)
: profile_(profile)
Expand Down
Loading