From 96a253d619ed4891c32b7c6e4c8cbadde26d611d Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 1 Apr 2021 16:54:55 +0200 Subject: [PATCH 01/43] vulkan platform stub --- src/CMakeLists.txt | 9 +++++ src/anydsl_runtime.cpp | 1 + src/anydsl_runtime_config.h.in | 1 + src/platform.h | 1 + src/runtime.cpp | 3 ++ src/vulkan_platform.cpp | 62 ++++++++++++++++++++++++++++++++++ src/vulkan_platform.h | 43 +++++++++++++++++++++++ 7 files changed, 120 insertions(+) create mode 100644 src/vulkan_platform.cpp create mode 100644 src/vulkan_platform.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2653328b..40455c6e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -81,6 +81,15 @@ if(HSA_FOUND) endif() set(AnyDSL_runtime_HAS_HSA_SUPPORT ${HSA_FOUND} CACHE INTERNAL "enables HSA support") +find_package(Vulkan) +if(Vulkan_FOUND) + add_library(runtime_vulkan STATIC vulkan_platform.cpp vulkan_platform.h) + target_include_directories(runtime_vulkan PRIVATE ${Vulkan_INCLUDE_DIRS}) + target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES}) + list(APPEND RUNTIME_PLATFORMS runtime_vulkan) +endif() +set(AnyDSL_runtime_HAS_Vulkan_SUPPORT ${Vulkan_FOUND} CACHE INTERNAL "enables Vulkan support") + # look for LLVM for nvptx and gcn find_package(LLVM) if(LLVM_FOUND) diff --git a/src/anydsl_runtime.cpp b/src/anydsl_runtime.cpp index c9759abc..1a45340b 100644 --- a/src/anydsl_runtime.cpp +++ b/src/anydsl_runtime.cpp @@ -32,6 +32,7 @@ struct RuntimeSingleton { register_cuda_platform(&runtime); register_opencl_platform(&runtime); register_hsa_platform(&runtime); + register_vulkan_platform(&runtime); } static ProfileLevel detect_profile_level() { diff --git a/src/anydsl_runtime_config.h.in b/src/anydsl_runtime_config.h.in index 199a5a10..77fac361 100644 --- a/src/anydsl_runtime_config.h.in +++ b/src/anydsl_runtime_config.h.in @@ -9,6 +9,7 @@ #cmakedefine AnyDSL_runtime_HAS_CUDA_SUPPORT #cmakedefine AnyDSL_runtime_HAS_OPENCL_SUPPORT #cmakedefine AnyDSL_runtime_HAS_HSA_SUPPORT +#cmakedefine AnyDSL_runtime_HAS_Vulkan_SUPPORT #cmakedefine AnyDSL_runtime_HAS_TBB_SUPPORT diff --git a/src/platform.h b/src/platform.h index 2e2720a2..86fa2401 100644 --- a/src/platform.h +++ b/src/platform.h @@ -12,6 +12,7 @@ void register_cpu_platform(Runtime*); void register_cuda_platform(Runtime*); void register_opencl_platform(Runtime*); void register_hsa_platform(Runtime*); +void register_vulkan_platform(Runtime*); /// A runtime platform. Exposes a set of devices, a copy function, /// and functions to allocate and release memory. diff --git a/src/runtime.cpp b/src/runtime.cpp index d0528e5e..83e02ec3 100644 --- a/src/runtime.cpp +++ b/src/runtime.cpp @@ -17,6 +17,9 @@ void register_opencl_platform(Runtime* runtime) { runtime->register_platformregister_platform("HSA"); } #endif +#ifndef AnyDSL_runtime_HAS_Vulkan_SUPPORT +void register_vulkan_platform(Runtime* runtime) { runtime->register_platform("Vulkan"); } +#endif Runtime::Runtime(ProfileLevel profile) : profile_(profile) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp new file mode 100644 index 00000000..e82ae010 --- /dev/null +++ b/src/vulkan_platform.cpp @@ -0,0 +1,62 @@ +#include "vulkan_platform.h" + +VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { + instance = vk::createInstance({}); + devices = instance.enumeratePhysicalDevices(); + debug("Available Vulkan physical devices: "); + size_t i = 0; + for (auto& dev : devices) { + auto properties = dev.getProperties(); + debug(" GPU%:", i++); + debug(" Device name: %", properties.deviceName); + debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion), VK_VERSION_PATCH(properties.apiVersion)); + } +} + +VulkanPlatform::~VulkanPlatform() { + instance.destroy(); +} + +void *VulkanPlatform::alloc(DeviceId dev, int64_t size) { + return nullptr; +} + +void *VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { + return nullptr; +} + +void *VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { + return nullptr; +} + +void VulkanPlatform::release(DeviceId dev, void *ptr) { + +} + +void VulkanPlatform::release_host(DeviceId dev, void *ptr) { + +} + +void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { + +} + +void VulkanPlatform::synchronize(DeviceId dev) { + +} + +void VulkanPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { + +} + +void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { + +} + +void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { + +} + +void register_vulkan_platform(Runtime* runtime) { + runtime->register_platform(); +} diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h new file mode 100644 index 00000000..0ae3ac2b --- /dev/null +++ b/src/vulkan_platform.h @@ -0,0 +1,43 @@ +#ifndef ANYDSL_RUNTIME_VULKAN_PLATFORM_H +#define ANYDSL_RUNTIME_VULKAN_PLATFORM_H + +#include "platform.h" +#include + +class VulkanPlatform : public Platform { +public: + VulkanPlatform(Runtime* runtime); + ~VulkanPlatform() override; + +protected: + void *alloc(DeviceId dev, int64_t size) override; + void *alloc_host(DeviceId dev, int64_t size) override; + void *alloc_unified(DeviceId dev, int64_t size) override { command_unavailable("alloc_unified"); } + + void *get_device_ptr(DeviceId dev, void *ptr) override; + + void release(DeviceId dev, void *ptr) override; + + void release_host(DeviceId dev, void *ptr) override; + + void launch_kernel(DeviceId dev, const LaunchParams &launch_params) override; + + void synchronize(DeviceId dev) override; + + void copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, + int64_t size) override; + + void copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, + int64_t size) override; + + void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, + int64_t size) override; + + size_t dev_count() const override { return devices.size(); } + std::string name() const override { return "Vulkan"; } + + vk::Instance instance; + std::vector devices; +}; + +#endif From 7e82fe015915e1519f03504827b10fdaced7c908 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Mon, 12 Apr 2021 14:23:37 +0200 Subject: [PATCH 02/43] don't use vulkan.hpp --- platforms/artic/intrinsics_thorin.impala | 10 +---- src/vulkan_platform.cpp | 51 +++++++++++++++++++++--- src/vulkan_platform.h | 20 ++++++++-- 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/platforms/artic/intrinsics_thorin.impala b/platforms/artic/intrinsics_thorin.impala index 262e7fb6..e5c3d021 100644 --- a/platforms/artic/intrinsics_thorin.impala +++ b/platforms/artic/intrinsics_thorin.impala @@ -14,6 +14,7 @@ #[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn amdgpu(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); +#[import(cc = "thorin")] fn spirv(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T]; #[import(cc = "thorin")] fn hls(_body: fn() -> ()) -> (); #[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend @@ -34,12 +35,3 @@ fn spawn(body: fn() -> ()) = @|| thorin_spawn(body); fn @pipeline(body: fn (i32) -> ()) = @|initiation_interval: i32, lower: i32, upper: i32| thorin_pipeline(initiation_interval, lower, upper, body); fn @parallel(body: fn (i32) -> ()) = @|num_threads: i32, lower: i32, upper: i32| thorin_parallel(num_threads, lower, upper, body); - -// intrinsics for flow graphs -struct FlowGraph { empty: i32 } -struct FlowTask { empty: i32 } -#[import(cc = "thorin", name = "anydsl_create_graph")] fn create_graph() -> FlowGraph; -#[import(cc = "thorin", name = "anydsl_create_task")] fn thorin_create_task(_graph: FlowGraph, _body: fn() -> ()) -> FlowTask; -#[import(cc = "thorin", name = "anydsl_create_edge")] fn create_edge(_src: FlowTask, _dst: FlowTask) -> (); -#[import(cc = "thorin", name = "anydsl_execute_graph")] fn execute_flowgraph(_graph: FlowGraph, _task: FlowTask) -> (); -fn create_task(body: fn() -> ()) = @|graph: FlowGraph| thorin_create_task(graph, body); diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index e82ae010..2cfe9fc2 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -1,20 +1,59 @@ #include "vulkan_platform.h" VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { - instance = vk::createInstance({}); - devices = instance.enumeratePhysicalDevices(); + std::vector enabled_layers; + std::vector enabled_instance_extensions; + auto app_info = VkApplicationInfo { + .pApplicationName = "AnyDSL Runtime" + }; + auto create_info = VkInstanceCreateInfo { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .pApplicationInfo = &app_info, + .enabledLayerCount = (uint32_t) enabled_layers.size(), + .ppEnabledLayerNames = enabled_layers.data(), + .enabledExtensionCount = (uint32_t) enabled_instance_extensions.size(), + .ppEnabledExtensionNames = enabled_instance_extensions.data(), + }; + vkCreateInstance(&create_info, nullptr, &instance); + + uint32_t physical_devices_count; + vkEnumeratePhysicalDevices(instance, &physical_devices_count, nullptr); + physical_devices.resize(physical_devices_count); + vkEnumeratePhysicalDevices(instance, &physical_devices_count, physical_devices.data()); + debug("Available Vulkan physical devices: "); size_t i = 0; - for (auto& dev : devices) { - auto properties = dev.getProperties(); - debug(" GPU%:", i++); + for (auto& dev : physical_devices) { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(dev, &properties); + debug(" GPU%:", i); debug(" Device name: %", properties.deviceName); debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion), VK_VERSION_PATCH(properties.apiVersion)); + + usable_devices.emplace_back(std::make_unique(*this, dev, i)); + i++; } } VulkanPlatform::~VulkanPlatform() { - instance.destroy(); + vkDestroyInstance(instance, nullptr); +} + +VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i) + : platform(platform), physical_device(physical_device), i(i) { + + /*auto create_info = VkDeviceCreateInfo { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + + }; + vkCreateDevice(physical_device, &create_info, nullptr, &device);*/ +} + +VulkanPlatform::Device::~Device() { + //vkDestroyDevice(device, nullptr); } void *VulkanPlatform::alloc(DeviceId dev, int64_t size) { diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 0ae3ac2b..f1b708b8 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -2,7 +2,7 @@ #define ANYDSL_RUNTIME_VULKAN_PLATFORM_H #include "platform.h" -#include +#include class VulkanPlatform : public Platform { public: @@ -33,11 +33,23 @@ class VulkanPlatform : public Platform { void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) override; - size_t dev_count() const override { return devices.size(); } + size_t dev_count() const override { return physical_devices.size(); } std::string name() const override { return "Vulkan"; } - vk::Instance instance; - std::vector devices; + struct Device { + VulkanPlatform& platform; + VkPhysicalDevice physical_device; + size_t i; + + VkDevice device; + + Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i); + ~Device(); + }; + + VkInstance instance; + std::vector physical_devices; + std::vector> usable_devices; }; #endif From 2a899e34b153d197a1a2aa5f8d2c68c25ba1d030 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Mon, 12 Apr 2021 14:42:59 +0200 Subject: [PATCH 03/43] validation layers --- src/vulkan_platform.cpp | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 2cfe9fc2..995cebd5 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -1,8 +1,45 @@ #include "vulkan_platform.h" +const auto khr_validation = "VK_LAYER_KHRONOS_validation"; + +inline std::vector query_layers_available() { + uint32_t count; + vkEnumerateInstanceLayerProperties(&count, nullptr); + std::vector layers(count); + vkEnumerateInstanceLayerProperties(&count, layers.data()); + return layers; +} + +inline std::vector query_extensions_available() { + uint32_t count; + vkEnumerateInstanceExtensionProperties(nullptr, &count, nullptr); + std::vector exts(count); + vkEnumerateInstanceExtensionProperties(nullptr, &count, exts.data()); + return exts; +} + VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { + auto available_layers = query_layers_available(); + auto available_instance_extensions = query_extensions_available(); + std::vector enabled_layers; std::vector enabled_instance_extensions; + + bool should_enable_validation = true; +#ifdef NDEBUG + should_enable_validation = false; +#endif + if (should_enable_validation) { + for (auto& layer : available_layers) { + if (strcmp(khr_validation, layer.layerName) == 0) { + enabled_layers.push_back(khr_validation); + goto validation_done; + } + } + info("Warning: validation enabled but layers not present"); + } + validation_done: + auto app_info = VkApplicationInfo { .pApplicationName = "AnyDSL Runtime" }; @@ -42,7 +79,6 @@ VulkanPlatform::~VulkanPlatform() { VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i) : platform(platform), physical_device(physical_device), i(i) { - /*auto create_info = VkDeviceCreateInfo { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = nullptr, From f9990bea164ad073fd1ea487b223b71d94470d4b Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Mon, 12 Apr 2021 15:29:04 +0200 Subject: [PATCH 04/43] create a device --- src/vulkan_platform.cpp | 81 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 995cebd5..b1a599df 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -2,6 +2,12 @@ const auto khr_validation = "VK_LAYER_KHRONOS_validation"; +#define CHECK(stuff) { \ + auto rslt = stuff; \ + if (rslt != VK_SUCCESS) \ + error("error, failed %", #stuff); \ +} + inline std::vector query_layers_available() { uint32_t count; vkEnumerateInstanceLayerProperties(&count, nullptr); @@ -62,34 +68,89 @@ VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { debug("Available Vulkan physical devices: "); size_t i = 0; for (auto& dev : physical_devices) { - VkPhysicalDeviceProperties properties; - vkGetPhysicalDeviceProperties(dev, &properties); - debug(" GPU%:", i); - debug(" Device name: %", properties.deviceName); - debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion), VK_VERSION_PATCH(properties.apiVersion)); - usable_devices.emplace_back(std::make_unique(*this, dev, i)); i++; } + debug("Vulkan platform successfully initialized"); } VulkanPlatform::~VulkanPlatform() { + usable_devices.clear(); vkDestroyInstance(instance, nullptr); } VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i) : platform(platform), physical_device(physical_device), i(i) { - /*auto create_info = VkDeviceCreateInfo { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(physical_device, &properties); + debug(" GPU%:", i); + debug(" Device name: %", properties.deviceName); + debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion), VK_VERSION_PATCH(properties.apiVersion)); + + uint32_t exts_count; + vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); + std::vector available_device_extensions(exts_count); + vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); + std::vector enabled_instance_extensions; + + uint32_t queue_families_count; + vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); + std::vector queue_families(queue_families_count); + vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, queue_families.data()); + int compute_queue = -1; + int q = 0; + for (auto& queue_f : queue_families) { + bool has_gfx = (queue_f.queueFlags & 0x00000001) != 0; + bool has_compute = (queue_f.queueFlags & 0x00000002) != 0; + bool has_xfer = (queue_f.queueFlags & 0x00000004) != 0; + bool has_sparse = (queue_f.queueFlags & 0x00000008) != 0; + bool has_protected = (queue_f.queueFlags & 0x00000010) != 0; + /*debug("queue %", q); + debug("has_gfx %", has_gfx); + debug("has_compute %", has_compute); + debug("has_xfer %", has_xfer); + debug("has_sparse %", has_sparse); + debug("has_protected %", has_protected);*/ + + // TODO perform this intelligently + if (compute_queue == -1 && has_compute) + compute_queue = q; + q++; + } + std::vector queue_create_infos; + float priority = 1.0f; + if (compute_queue != -1) { + queue_create_infos.push_back(VkDeviceQueueCreateInfo { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = (uint32_t) compute_queue, + .queueCount = 1, + .pQueuePriorities = &priority + }); + } else { + assert(false && "unsuitable device"); + } + + auto enabled_features = VkPhysicalDeviceFeatures {}; + + auto create_info = VkDeviceCreateInfo { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = nullptr, .flags = 0, - + .queueCreateInfoCount = (uint32_t) queue_create_infos.size(), + .pQueueCreateInfos = queue_create_infos.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = (uint32_t) enabled_instance_extensions.size(), + .ppEnabledExtensionNames = enabled_instance_extensions.data(), + .pEnabledFeatures = &enabled_features }; - vkCreateDevice(physical_device, &create_info, nullptr, &device);*/ + CHECK(vkCreateDevice(physical_device, &create_info, nullptr, &device)); } VulkanPlatform::Device::~Device() { - //vkDestroyDevice(device, nullptr); + vkDestroyDevice(device, nullptr); } void *VulkanPlatform::alloc(DeviceId dev, int64_t size) { From f8ebd5845bdeabd3b6d4ce4d550cd9af8c207b92 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Mon, 12 Apr 2021 19:04:26 +0200 Subject: [PATCH 05/43] buffer stuff stub --- src/vulkan_platform.cpp | 111 ++++++++++++++++++++++++++++++++-------- src/vulkan_platform.h | 31 +++++++++-- 2 files changed, 117 insertions(+), 25 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index b1a599df..a7d1905e 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -79,13 +79,13 @@ VulkanPlatform::~VulkanPlatform() { vkDestroyInstance(instance, nullptr); } -VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i) - : platform(platform), physical_device(physical_device), i(i) { - VkPhysicalDeviceProperties properties; - vkGetPhysicalDeviceProperties(physical_device, &properties); - debug(" GPU%:", i); - debug(" Device name: %", properties.deviceName); - debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion), VK_VERSION_PATCH(properties.apiVersion)); +VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id) + : platform(platform), physical_device(physical_device), device_id(device_id) { + VkPhysicalDeviceProperties device_properties; + vkGetPhysicalDeviceProperties(physical_device, &device_properties); + debug(" GPU%:", device_id); + debug(" Device name: %", device_properties.deviceName); + debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(device_properties.apiVersion), VK_VERSION_MINOR(device_properties.apiVersion), VK_VERSION_PATCH(device_properties.apiVersion)); uint32_t exts_count; vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); @@ -105,12 +105,6 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic bool has_xfer = (queue_f.queueFlags & 0x00000004) != 0; bool has_sparse = (queue_f.queueFlags & 0x00000008) != 0; bool has_protected = (queue_f.queueFlags & 0x00000010) != 0; - /*debug("queue %", q); - debug("has_gfx %", has_gfx); - debug("has_compute %", has_compute); - debug("has_xfer %", has_xfer); - debug("has_sparse %", has_sparse); - debug("has_protected %", has_protected);*/ // TODO perform this intelligently if (compute_queue == -1 && has_compute) @@ -150,27 +144,94 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic } VulkanPlatform::Device::~Device() { - vkDestroyDevice(device, nullptr); + if (device != nullptr) + vkDestroyDevice(device, nullptr); } -void *VulkanPlatform::alloc(DeviceId dev, int64_t size) { - return nullptr; +uint32_t VulkanPlatform::Device::find_suitable_memory_type(VkMemoryRequirements requirements) { + VkPhysicalDeviceMemoryProperties device_memory_properties; + vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); + for (size_t bit = 0; bit < 32; bit++) { + auto& memory_type = device_memory_properties.memoryTypes[bit]; + auto& memory_heap = device_memory_properties.memoryHeaps[memory_type.heapIndex]; + + bool is_device_local = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; + + if ((requirements.memoryTypeBits & (1 << bit)) != 0) { + if (is_device_local) + return bit; + } + } + assert(false && "Unable to find a suitable memory type"); +} + +void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { + auto& device = usable_devices[dev]; + + auto buffer_create_info = VkBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + VkBuffer buffer; + vkCreateBuffer(device->device, &buffer_create_info, nullptr, &buffer); + + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(device->device, buffer, &memory_requirements); + + auto allocation_info = VkMemoryAllocateInfo { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! + .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements), + }; + VkDeviceMemory memory; + vkAllocateMemory(device->device, &allocation_info, nullptr, &memory); + + vkBindBufferMemory(device->device, buffer, memory, 0); + size_t id = device->next_resource_id++; + + std::unique_ptr res_buffer = std::make_unique(*device); + res_buffer->alloc = memory; + res_buffer->id = id; + res_buffer->buffer = buffer; + device->resources.push_back(std::move(res_buffer)); + + return reinterpret_cast(id); } -void *VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { - return nullptr; +void* VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { + command_unavailable("alloc_host"); } -void *VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { - return nullptr; +void* VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { + command_unavailable("get_device_ptr"); } void VulkanPlatform::release(DeviceId dev, void *ptr) { + if (ptr == nullptr) + return; + auto& device = usable_devices[dev]; + size_t id = reinterpret_cast(ptr); + size_t i = 0; + for (auto& resource : device->resources) { + if (resource->id == id) { + device->resources.erase(device->resources.begin() + i); + return; + } + i++; + } + assert(false && "Could not find such a buffer to release"); } void VulkanPlatform::release_host(DeviceId dev, void *ptr) { - + command_unavailable("release_host"); } void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { @@ -196,3 +257,11 @@ void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t off void register_vulkan_platform(Runtime* runtime) { runtime->register_platform(); } + +VulkanPlatform::Resource::~Resource() { + vkFreeMemory(device.device, alloc, nullptr); +} + +VulkanPlatform::Buffer::~Buffer() { + vkDestroyBuffer(device.device, buffer, nullptr); +} diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index f1b708b8..857f2fe5 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -33,18 +33,41 @@ class VulkanPlatform : public Platform { void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) override; - size_t dev_count() const override { return physical_devices.size(); } + size_t dev_count() const override { return usable_devices.size(); } std::string name() const override { return "Vulkan"; } + struct Device; + + struct Resource { + public: + Device& device; + size_t id; + VkDeviceMemory alloc; + + Resource(Device& device) : device(device) {} + virtual ~Resource(); + }; + + struct Buffer : public Resource { + VkBuffer buffer; + + Buffer(Device& device) : Resource(device) {} + ~Buffer() override; + }; + struct Device { VulkanPlatform& platform; VkPhysicalDevice physical_device; - size_t i; + size_t device_id; + VkDevice device = nullptr; - VkDevice device; + std::vector> resources; + size_t next_resource_id = 1; // resource id 0 is reserved - Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t i); + Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); + + uint32_t find_suitable_memory_type(VkMemoryRequirements requirements); }; VkInstance instance; From 0a7ba44507a02a5986754b6d0a0c71fbfce3fb26 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Tue, 13 Apr 2021 13:04:02 +0200 Subject: [PATCH 06/43] added importing host memory --- src/vulkan_platform.cpp | 74 ++++++++++++++++++++++++++++++++++++----- src/vulkan_platform.h | 5 ++- 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index a7d1905e..2191cc77 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -29,7 +29,9 @@ VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { auto available_instance_extensions = query_extensions_available(); std::vector enabled_layers; - std::vector enabled_instance_extensions; + std::vector enabled_instance_extensions { + "VK_KHR_external_memory_capabilities" + }; bool should_enable_validation = true; #ifdef NDEBUG @@ -47,7 +49,8 @@ VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { validation_done: auto app_info = VkApplicationInfo { - .pApplicationName = "AnyDSL Runtime" + .pApplicationName = "AnyDSL Runtime", + .apiVersion = VK_API_VERSION_1_2, }; auto create_info = VkInstanceCreateInfo { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, @@ -81,17 +84,34 @@ VulkanPlatform::~VulkanPlatform() { VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id) : platform(platform), physical_device(physical_device), device_id(device_id) { - VkPhysicalDeviceProperties device_properties; - vkGetPhysicalDeviceProperties(physical_device, &device_properties); + auto external_memory_host_properties = VkPhysicalDeviceExternalMemoryHostPropertiesEXT { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + .pNext = nullptr, + .minImportedHostPointerAlignment = 0xDEADBEEF, + }; + auto device_properties2 = VkPhysicalDeviceProperties2 { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &external_memory_host_properties, + }; + vkGetPhysicalDeviceProperties2(physical_device, &device_properties2); + auto& device_properties = device_properties2.properties; + debug(" GPU%:", device_id); debug(" Device name: %", device_properties.deviceName); debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(device_properties.apiVersion), VK_VERSION_MINOR(device_properties.apiVersion), VK_VERSION_PATCH(device_properties.apiVersion)); + min_imported_host_ptr_alignment = external_memory_host_properties.minImportedHostPointerAlignment; + debug("Min imported host ptr alignment: %", min_imported_host_ptr_alignment); + if (min_imported_host_ptr_alignment == 0xDEADBEEF) + error("Device does not report minimum host pointer alignment"); + uint32_t exts_count; vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); std::vector available_device_extensions(exts_count); vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); - std::vector enabled_instance_extensions; + std::vector enabled_instance_extensions { + "VK_EXT_external_memory_host" + }; uint32_t queue_families_count; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); @@ -148,7 +168,7 @@ VulkanPlatform::Device::~Device() { vkDestroyDevice(device, nullptr); } -uint32_t VulkanPlatform::Device::find_suitable_memory_type(VkMemoryRequirements requirements) { +uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits) { VkPhysicalDeviceMemoryProperties device_memory_properties; vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); for (size_t bit = 0; bit < 32; bit++) { @@ -157,7 +177,7 @@ uint32_t VulkanPlatform::Device::find_suitable_memory_type(VkMemoryRequirements bool is_device_local = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; - if ((requirements.memoryTypeBits & (1 << bit)) != 0) { + if ((memory_type_bits & (1 << bit)) != 0) { if (is_device_local) return bit; } @@ -188,7 +208,7 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = nullptr, .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements), + .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements.memoryTypeBits), }; VkDeviceMemory memory; vkAllocateMemory(device->device, &allocation_info, nullptr, &memory); @@ -242,12 +262,50 @@ void VulkanPlatform::synchronize(DeviceId dev) { } +VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { + VkExternalMemoryHandleTypeFlagBits handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + + // Align stuff + size_t mask = !(min_imported_host_ptr_alignment - 1); + size_t host_ptr = (size_t)ptr; + size_t aligned_host_ptr = host_ptr & mask; + + size_t end = host_ptr + size; + size_t aligned_end = ((end + min_imported_host_ptr_alignment - 1) / min_imported_host_ptr_alignment) * min_imported_host_ptr_alignment; + size_t aligned_size = aligned_end - aligned_host_ptr; + + // Find the corresponding device memory type index + VkMemoryHostPointerPropertiesEXT host_ptr_properties; + vkGetMemoryHostPointerPropertiesEXT(device, handle_type, (void*)aligned_host_ptr, &host_ptr_properties); + uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits); + + // Import memory + auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .pNext = nullptr, + .handleType = handle_type, + .pHostPointer = (void*) aligned_host_ptr, + }; + auto allocation_info = VkMemoryAllocateInfo { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_ptr_info, + .allocationSize = (VkDeviceSize) aligned_size, + .memoryTypeIndex = memory_type + }; + VkDeviceMemory imported_memory; + CHECK(vkAllocateMemory(device, &allocation_info, nullptr, &imported_memory)); + return imported_memory; +} + void VulkanPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { } void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { + auto& device = usable_devices[dev_dst]; + size_t host_ptr = (size_t)src + offset_src; + VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); } void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 857f2fe5..b4c0349c 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -61,13 +61,16 @@ class VulkanPlatform : public Platform { size_t device_id; VkDevice device = nullptr; + size_t min_imported_host_ptr_alignment; + std::vector> resources; size_t next_resource_id = 1; // resource id 0 is reserved Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); - uint32_t find_suitable_memory_type(VkMemoryRequirements requirements); + uint32_t find_suitable_memory_type(uint32_t memory_type_bits); + VkDeviceMemory import_host_memory(void* ptr, size_t size); }; VkInstance instance; From 7d1d8cc541c06a7d3e7277f4704d88b362256758 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Tue, 13 Apr 2021 15:44:59 +0200 Subject: [PATCH 07/43] copy commands --- src/vulkan_platform.cpp | 112 ++++++++++++++++++++++++++++++++++++---- src/vulkan_platform.h | 6 +++ 2 files changed, 109 insertions(+), 9 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 2191cc77..83a16a7c 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -117,7 +117,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); std::vector queue_families(queue_families_count); vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, queue_families.data()); - int compute_queue = -1; + int compute_queue_family = -1; int q = 0; for (auto& queue_f : queue_families) { bool has_gfx = (queue_f.queueFlags & 0x00000001) != 0; @@ -127,20 +127,20 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic bool has_protected = (queue_f.queueFlags & 0x00000010) != 0; // TODO perform this intelligently - if (compute_queue == -1 && has_compute) - compute_queue = q; + if (compute_queue_family == -1 && has_compute) + compute_queue_family = q; q++; } std::vector queue_create_infos; - float priority = 1.0f; - if (compute_queue != -1) { + float one = 1.0f; + if (compute_queue_family != -1) { queue_create_infos.push_back(VkDeviceQueueCreateInfo { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .queueFamilyIndex = (uint32_t) compute_queue, + .queueFamilyIndex = (uint32_t) compute_queue_family, .queueCount = 1, - .pQueuePriorities = &priority + .pQueuePriorities = &one }); } else { assert(false && "unsuitable device"); @@ -148,7 +148,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic auto enabled_features = VkPhysicalDeviceFeatures {}; - auto create_info = VkDeviceCreateInfo { + auto device_create_info = VkDeviceCreateInfo { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -160,10 +160,20 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .ppEnabledExtensionNames = enabled_instance_extensions.data(), .pEnabledFeatures = &enabled_features }; - CHECK(vkCreateDevice(physical_device, &create_info, nullptr, &device)); + CHECK(vkCreateDevice(physical_device, &device_create_info, nullptr, &device)); + vkGetDeviceQueue(device, compute_queue_family, 0, &queue); + + auto cmd_pool_create_info = VkCommandPoolCreateInfo { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .queueFamilyIndex = (uint32_t) compute_queue_family, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT + }; + CHECK(vkCreateCommandPool(device, &cmd_pool_create_info, nullptr, &cmd_pool)); } VulkanPlatform::Device::~Device() { + vkDestroyCommandPool(device, cmd_pool, nullptr); if (device != nullptr) vkDestroyDevice(device, nullptr); } @@ -233,6 +243,17 @@ void* VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { command_unavailable("get_device_ptr"); } +VulkanPlatform::Resource* VulkanPlatform::Device::find_resource_by_id(size_t id) { + size_t i = 0; + for (auto& resource : resources) { + if (resource->id == id) { + return resources[i].get(); + } + i++; + } + return nullptr; +} + void VulkanPlatform::release(DeviceId dev, void *ptr) { if (ptr == nullptr) return; @@ -297,15 +318,88 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size return imported_memory; } +VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { + if (spare_cmd_bufs.size() > 0) { + VkCommandBuffer cmd_buf = spare_cmd_bufs.back(); + spare_cmd_bufs.pop_back(); + return cmd_buf; + } + auto cmd_buf_create_info = VkCommandBufferAllocateInfo { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = cmd_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1 + }; + VkCommandBuffer cmd_buf; + vkAllocateCommandBuffers(device, &cmd_buf_create_info, &cmd_buf); + return cmd_buf; +} + +void VulkanPlatform::Device::return_command_buffer(VkCommandBuffer cmd_buf) { + vkResetCommandBuffer(cmd_buf, 0); + spare_cmd_bufs.push_back(cmd_buf); +} + void VulkanPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { } void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { auto& device = usable_devices[dev_dst]; + auto dst_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) dst); + auto dst_buffer = dst_buffer_resource->buffer; + // Import host memory and wrap it in a buffer size_t host_ptr = (size_t)src + offset_src; VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); + auto tmp_buffer_create_info = VkBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + VkBuffer tmp_buffer; + vkCreateBuffer(device->device, &tmp_buffer_create_info, nullptr, &tmp_buffer); + vkBindBufferMemory(device->device, tmp_buffer, imported_memory, 0); + + VkCommandBuffer cmd_buf = device->obtain_command_buffer(); + auto begin_command_buffer_info = VkCommandBufferBeginInfo { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }; + vkBeginCommandBuffer(cmd_buf, &begin_command_buffer_info); + VkBufferCopy copy_region { + .srcOffset = 0, + .dstOffset = (VkDeviceSize) offset_dst, + .size = (VkDeviceSize) size, + }; + vkCmdCopyBuffer(cmd_buf, tmp_buffer, dst_buffer, 1, ©_region); + vkEndCommandBuffer(cmd_buf); + auto submit_info = VkSubmitInfo { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = &cmd_buf, + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, + }; + vkQueueSubmit(device->queue, 1, &submit_info, VK_NULL_HANDLE); + vkDeviceWaitIdle(device->device); + device->return_command_buffer(cmd_buf); + + // Cleanup + vkFreeMemory(device->device, imported_memory, nullptr); + vkDestroyBuffer(device->device, tmp_buffer, nullptr); } void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index b4c0349c..f3c0c684 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -65,12 +65,18 @@ class VulkanPlatform : public Platform { std::vector> resources; size_t next_resource_id = 1; // resource id 0 is reserved + VkQueue queue; + VkCommandPool cmd_pool; + std::vector spare_cmd_bufs; Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); + Resource* find_resource_by_id(size_t id); uint32_t find_suitable_memory_type(uint32_t memory_type_bits); VkDeviceMemory import_host_memory(void* ptr, size_t size); + VkCommandBuffer obtain_command_buffer(); + void return_command_buffer(VkCommandBuffer cmd_buf); }; VkInstance instance; From 00168eb00f2b4b1581c5741b5aab704290b437a3 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 14 Apr 2021 12:22:55 +0200 Subject: [PATCH 08/43] load kernel boilerplate --- platforms/artic/runtime.impala | 2 + src/vulkan_platform.cpp | 74 ++++++++++++++++++++++++++++++++-- src/vulkan_platform.h | 15 ++++++- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/platforms/artic/runtime.impala b/platforms/artic/runtime.impala index befef9cb..99187fee 100644 --- a/platforms/artic/runtime.impala +++ b/platforms/artic/runtime.impala @@ -104,6 +104,8 @@ fn @alloc_hsa(dev: i32, size: i64) = alloc(runtime_device(3, dev), size); fn @alloc_hsa_host(dev: i32, size: i64) = alloc_host(runtime_device(3, dev), size); fn @alloc_hsa_unified(dev: i32, size: i64) = alloc_unified(runtime_device(3, dev), size); fn @synchronize_hsa(dev: i32) = runtime_synchronize(runtime_device(3, dev)); +fn @alloc_vulkan(dev: i32, size: i64) = alloc(runtime_device(4, dev), size); +fn @synchronize_vulkan(dev: i32) = runtime_synchronize(runtime_device(4, dev)); fn @copy(src: Buffer, dst: Buffer) = runtime_copy(src.device, src.data, 0, dst.device, dst.data, 0, src.size); fn @copy_offset(src: Buffer, off_src: i64, dst: Buffer, off_dst: i64, size: i64) = runtime_copy(src.device, src.data, off_src, dst.device, dst.data, off_dst, size); diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 83a16a7c..33da7a5e 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -101,7 +101,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(device_properties.apiVersion), VK_VERSION_MINOR(device_properties.apiVersion), VK_VERSION_PATCH(device_properties.apiVersion)); min_imported_host_ptr_alignment = external_memory_host_properties.minImportedHostPointerAlignment; - debug("Min imported host ptr alignment: %", min_imported_host_ptr_alignment); + debug(" Min imported host ptr alignment: %", min_imported_host_ptr_alignment); if (min_imported_host_ptr_alignment == 0xDEADBEEF) error("Device does not report minimum host pointer alignment"); @@ -166,14 +166,15 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic auto cmd_pool_create_info = VkCommandPoolCreateInfo { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .pNext = nullptr, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = (uint32_t) compute_queue_family, - .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT }; CHECK(vkCreateCommandPool(device, &cmd_pool_create_info, nullptr, &cmd_pool)); } VulkanPlatform::Device::~Device() { vkDestroyCommandPool(device, cmd_pool, nullptr); + kernels.clear(); if (device != nullptr) vkDestroyDevice(device, nullptr); } @@ -275,8 +276,69 @@ void VulkanPlatform::release_host(DeviceId dev, void *ptr) { command_unavailable("release_host"); } -void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { +VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { + auto ki = kernels.find(filename); + if (ki == kernels.end()) { + auto [i,b] = kernels.emplace(filename, Kernel(*this)); + Kernel& kernel = i->second; + std::string bin = platform.runtime_->load_file(filename); + auto shader_module_create_info = VkShaderModuleCreateInfo { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = bin.size(), + .pCode = reinterpret_cast(bin.c_str()), + }; + vkCreateShaderModule(device, &shader_module_create_info, nullptr, &kernel.shader_module); + + auto stage = VkPipelineShaderStageCreateInfo { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = kernel.shader_module, + .pName = "kernel_main", + .pSpecializationInfo = nullptr, + }; + + std::vector push_constants { + VkPushConstantRange { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = 128 + } + }; + auto layout_create_info = VkPipelineLayoutCreateInfo { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 0, + .pSetLayouts = nullptr, + .pushConstantRangeCount = (uint32_t) push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }; + vkCreatePipelineLayout(device, &layout_create_info, nullptr, &kernel.layout); + + auto compute_pipeline_create_info = VkComputePipelineCreateInfo { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = stage, + .layout = kernel.layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }; + CHECK(vkCreateComputePipelines(device, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel.pipeline)); + return &kernel; + } + + return &ki->second; +} + +void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { + auto& device = usable_devices[dev]; + auto kernel = device->load_kernel(launch_params.file_name); } void VulkanPlatform::synchronize(DeviceId dev) { @@ -417,3 +479,9 @@ VulkanPlatform::Resource::~Resource() { VulkanPlatform::Buffer::~Buffer() { vkDestroyBuffer(device.device, buffer, nullptr); } + +VulkanPlatform::Kernel::~Kernel() { + vkDestroyPipeline(device.device, pipeline, nullptr); + vkDestroyPipelineLayout(device.device, layout, nullptr); + vkDestroyShaderModule(device.device, shader_module, nullptr); +} diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index f3c0c684..7880f52d 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -39,7 +39,7 @@ class VulkanPlatform : public Platform { struct Device; struct Resource { - public: + //public: Device& device; size_t id; VkDeviceMemory alloc; @@ -55,6 +55,17 @@ class VulkanPlatform : public Platform { ~Buffer() override; }; + struct Kernel { + Device& device; + + VkShaderModule shader_module; + VkPipelineLayout layout; + VkPipeline pipeline; + + Kernel(Device& device) : device(device) {} + ~Kernel(); + }; + struct Device { VulkanPlatform& platform; VkPhysicalDevice physical_device; @@ -68,6 +79,7 @@ class VulkanPlatform : public Platform { VkQueue queue; VkCommandPool cmd_pool; std::vector spare_cmd_bufs; + std::unordered_map kernels; Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); @@ -77,6 +89,7 @@ class VulkanPlatform : public Platform { VkDeviceMemory import_host_memory(void* ptr, size_t size); VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); + Kernel* load_kernel(const std::string&); }; VkInstance instance; From bd237ab90d7b067761e2cedd3a5d99863d6499b6 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 15 Apr 2021 09:39:46 +0200 Subject: [PATCH 09/43] factor out single-use cmdbuf creation --- src/vulkan_platform.cpp | 73 ++++++++++++++++++++++++----------------- src/vulkan_platform.h | 4 +++ 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 33da7a5e..2dc0feac 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -339,10 +339,17 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { auto& device = usable_devices[dev]; auto kernel = device->load_kernel(launch_params.file_name); + + device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, kernel->pipeline); + std::array push_constants {}; + vkCmdPushConstants(cmd_buf, kernel->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128, &push_constants); + vkCmdDispatch(cmd_buf, launch_params.grid[0], launch_params.grid[1], launch_params.grid[2]); + }); } void VulkanPlatform::synchronize(DeviceId dev) { - + // TODO: don't wait for idle everywhere } VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { @@ -403,6 +410,33 @@ void VulkanPlatform::Device::return_command_buffer(VkCommandBuffer cmd_buf) { spare_cmd_bufs.push_back(cmd_buf); } +void VulkanPlatform::Device::execute_command_buffer_oneshot(std::function fn) { + VkCommandBuffer cmd_buf = obtain_command_buffer(); + auto begin_command_buffer_info = VkCommandBufferBeginInfo { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }; + vkBeginCommandBuffer(cmd_buf, &begin_command_buffer_info); + fn(cmd_buf); + vkEndCommandBuffer(cmd_buf); + auto submit_info = VkSubmitInfo { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = &cmd_buf, + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, + }; + vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE); + vkDeviceWaitIdle(device); + return_command_buffer(cmd_buf); +} + void VulkanPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { } @@ -429,35 +463,14 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI vkCreateBuffer(device->device, &tmp_buffer_create_info, nullptr, &tmp_buffer); vkBindBufferMemory(device->device, tmp_buffer, imported_memory, 0); - VkCommandBuffer cmd_buf = device->obtain_command_buffer(); - auto begin_command_buffer_info = VkCommandBufferBeginInfo { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, - }; - vkBeginCommandBuffer(cmd_buf, &begin_command_buffer_info); - VkBufferCopy copy_region { - .srcOffset = 0, - .dstOffset = (VkDeviceSize) offset_dst, - .size = (VkDeviceSize) size, - }; - vkCmdCopyBuffer(cmd_buf, tmp_buffer, dst_buffer, 1, ©_region); - vkEndCommandBuffer(cmd_buf); - auto submit_info = VkSubmitInfo { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = nullptr, - .waitSemaphoreCount = 0, - .pWaitSemaphores = nullptr, - .pWaitDstStageMask = nullptr, - .commandBufferCount = 1, - .pCommandBuffers = &cmd_buf, - .signalSemaphoreCount = 0, - .pSignalSemaphores = nullptr, - }; - vkQueueSubmit(device->queue, 1, &submit_info, VK_NULL_HANDLE); - vkDeviceWaitIdle(device->device); - device->return_command_buffer(cmd_buf); + device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { + VkBufferCopy copy_region { + .srcOffset = 0, + .dstOffset = (VkDeviceSize) offset_dst, + .size = (VkDeviceSize) size, + }; + vkCmdCopyBuffer(cmd_buf, tmp_buffer, dst_buffer, 1, ©_region); + }); // Cleanup vkFreeMemory(device->device, imported_memory, nullptr); diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 7880f52d..1426940e 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -4,6 +4,8 @@ #include "platform.h" #include +#include + class VulkanPlatform : public Platform { public: VulkanPlatform(Runtime* runtime); @@ -90,6 +92,8 @@ class VulkanPlatform : public Platform { VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); Kernel* load_kernel(const std::string&); + + void execute_command_buffer_oneshot(std::function fn); }; VkInstance instance; From 7fa16eed96395a94d447a372241be92c984620c1 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 15 Apr 2021 10:01:52 +0200 Subject: [PATCH 10/43] load ext functions properly --- src/vulkan_platform.cpp | 11 +++++++++-- src/vulkan_platform.h | 11 +++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 2dc0feac..821aa731 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -170,6 +170,11 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .queueFamilyIndex = (uint32_t) compute_queue_family, }; CHECK(vkCreateCommandPool(device, &cmd_pool_create_info, nullptr, &cmd_pool)); + + // Load function pointers +#define f(s) extension_fns.s = (PFN_##s) vkGetDeviceProcAddr(device, #s); + DevicesExtensionsFunctions(f) +#undef f } VulkanPlatform::Device::~Device() { @@ -365,8 +370,10 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size size_t aligned_size = aligned_end - aligned_host_ptr; // Find the corresponding device memory type index - VkMemoryHostPointerPropertiesEXT host_ptr_properties; - vkGetMemoryHostPointerPropertiesEXT(device, handle_type, (void*)aligned_host_ptr, &host_ptr_properties); + VkMemoryHostPointerPropertiesEXT host_ptr_properties { + .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, handle_type, (void*)aligned_host_ptr, &host_ptr_properties); uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits); // Import memory diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 1426940e..7a066584 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -6,6 +6,10 @@ #include +/// Vulkan requires you to manually load certain function pointers, we use a macro to automate the boilerplate +#define DevicesExtensionsFunctions(f) \ + f(vkGetMemoryHostPointerPropertiesEXT) \ + class VulkanPlatform : public Platform { public: VulkanPlatform(Runtime* runtime); @@ -68,6 +72,12 @@ class VulkanPlatform : public Platform { ~Kernel(); }; + struct ExtensionFns { +#define f(s) PFN_##s s; + DevicesExtensionsFunctions(f) +#undef f + }; + struct Device { VulkanPlatform& platform; VkPhysicalDevice physical_device; @@ -82,6 +92,7 @@ class VulkanPlatform : public Platform { VkCommandPool cmd_pool; std::vector spare_cmd_bufs; std::unordered_map kernels; + ExtensionFns extension_fns; Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); From ce5231c81ca01eb0ee2ab37af086efd9a02353a2 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 15 Apr 2021 10:08:24 +0200 Subject: [PATCH 11/43] fix memory import (needs testing still) --- src/vulkan_platform.cpp | 10 +++++----- src/vulkan_platform.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 821aa731..7faa6a97 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -184,7 +184,7 @@ VulkanPlatform::Device::~Device() { vkDestroyDevice(device, nullptr); } -uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits) { +uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits, bool prefer_device_local) { VkPhysicalDeviceMemoryProperties device_memory_properties; vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); for (size_t bit = 0; bit < 32; bit++) { @@ -194,7 +194,7 @@ uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_ bool is_device_local = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; if ((memory_type_bits & (1 << bit)) != 0) { - if (is_device_local) + if (!prefer_device_local || is_device_local) return bit; } } @@ -224,7 +224,7 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = nullptr, .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements.memoryTypeBits), + .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements.memoryTypeBits, true), }; VkDeviceMemory memory; vkAllocateMemory(device->device, &allocation_info, nullptr, &memory); @@ -361,7 +361,7 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size VkExternalMemoryHandleTypeFlagBits handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; // Align stuff - size_t mask = !(min_imported_host_ptr_alignment - 1); + size_t mask = ~(min_imported_host_ptr_alignment - 1); size_t host_ptr = (size_t)ptr; size_t aligned_host_ptr = host_ptr & mask; @@ -374,7 +374,7 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, handle_type, (void*)aligned_host_ptr, &host_ptr_properties); - uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits); + uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, false); // Import memory auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 7a066584..5b8d5704 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -98,7 +98,7 @@ class VulkanPlatform : public Platform { ~Device(); Resource* find_resource_by_id(size_t id); - uint32_t find_suitable_memory_type(uint32_t memory_type_bits); + uint32_t find_suitable_memory_type(uint32_t memory_type_bits, bool prefer_device_local); VkDeviceMemory import_host_memory(void* ptr, size_t size); VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); From f301c0d476e9f210776b7d69e40fbd84ee452cbb Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 15 Apr 2021 10:23:08 +0200 Subject: [PATCH 12/43] cleanup leftover resources on backend shutdown --- src/vulkan_platform.cpp | 4 ++++ src/vulkan_platform.h | 9 +++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 7faa6a97..55e73f69 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -180,6 +180,10 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic VulkanPlatform::Device::~Device() { vkDestroyCommandPool(device, cmd_pool, nullptr); kernels.clear(); + if (!resources.empty()) { + info("Some vulkan resources were not released. Releasing those automatically..."); + resources.clear(); + } if (device != nullptr) vkDestroyDevice(device, nullptr); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 5b8d5704..b78337b9 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -30,14 +30,11 @@ class VulkanPlatform : public Platform { void synchronize(DeviceId dev) override; - void copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, - int64_t size) override; + void copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) override; - void copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, - int64_t size) override; + void copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) override; - void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, - int64_t size) override; + void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) override; size_t dev_count() const override { return usable_devices.size(); } std::string name() const override { return "Vulkan"; } From 2aec7e0e4319a6dc666beb5d4f642a76c671d8f1 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 22 Apr 2021 15:58:05 +0200 Subject: [PATCH 13/43] initial support for args/bda --- src/vulkan_platform.cpp | 51 +++++++++++++++++++++++++++++++++++------ src/vulkan_platform.h | 1 + 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 55e73f69..3264fb05 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -109,8 +109,9 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); std::vector available_device_extensions(exts_count); vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); - std::vector enabled_instance_extensions { - "VK_EXT_external_memory_host" + std::vector enabled_device_extensions { + "VK_EXT_external_memory_host", + "VK_EXT_buffer_device_address" // not KHR version ! }; uint32_t queue_families_count; @@ -146,19 +147,29 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic assert(false && "unsuitable device"); } - auto enabled_features = VkPhysicalDeviceFeatures {}; + auto bda_features = VkPhysicalDeviceBufferDeviceAddressFeaturesEXT { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT, + .pNext = nullptr, + .bufferDeviceAddress = true, + }; + auto enabled_features = VkPhysicalDeviceFeatures2 { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &bda_features, + .features = { + } + }; auto device_create_info = VkDeviceCreateInfo { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = nullptr, + .pNext = &enabled_features, .flags = 0, .queueCreateInfoCount = (uint32_t) queue_create_infos.size(), .pQueueCreateInfos = queue_create_infos.data(), .enabledLayerCount = 0, .ppEnabledLayerNames = nullptr, - .enabledExtensionCount = (uint32_t) enabled_instance_extensions.size(), - .ppEnabledExtensionNames = enabled_instance_extensions.data(), - .pEnabledFeatures = &enabled_features + .enabledExtensionCount = (uint32_t) enabled_device_extensions.size(), + .ppEnabledExtensionNames = enabled_device_extensions.data(), + .pEnabledFeatures = nullptr // controlled via VkPhysicalDeviceFeatures2 }; CHECK(vkCreateDevice(physical_device, &device_create_info, nullptr, &device)); vkGetDeviceQueue(device, compute_queue_family, 0, &queue); @@ -240,6 +251,16 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { res_buffer->alloc = memory; res_buffer->id = id; res_buffer->buffer = buffer; + + auto bda_info = VkBufferDeviceAddressInfoEXT { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT, + .pNext = nullptr, + .buffer = res_buffer->buffer + }; + VkDeviceAddress bda = vkGetBufferDeviceAddressEXT(device->device, &bda_info); + assert(bda != 0 && "BDA failed"); + res_buffer->bda = bda; + device->resources.push_back(std::move(res_buffer)); return reinterpret_cast(id); @@ -352,6 +373,22 @@ void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_para device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, kernel->pipeline); std::array push_constants {}; + size_t offset = 0; + for (uint32_t arg = 0; arg < launch_params.num_args; arg++) { + if (launch_params.args.types[arg] == KernelArgType::Val) { + assert(launch_params.args.sizes[arg] == 4 && "Preliminary support..."); + memcpy(push_constants.data() + offset, launch_params.args.data[arg], 4); + offset += 4; + } else if (launch_params.args.types[arg] == KernelArgType::Ptr) { + void* buffer = *(void**)launch_params.args.data[arg]; + auto dst_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) buffer); + uint64_t buffer_bda = dst_buffer_resource->bda; + memcpy(push_constants.data() + offset, &buffer_bda, 8); + offset += 8; + } { + assert(false && "no struct support yet"); + } + } vkCmdPushConstants(cmd_buf, kernel->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128, &push_constants); vkCmdDispatch(cmd_buf, launch_params.grid[0], launch_params.grid[1], launch_params.grid[2]); }); diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index b78337b9..d5235693 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -53,6 +53,7 @@ class VulkanPlatform : public Platform { struct Buffer : public Resource { VkBuffer buffer; + uint64_t bda = -1; Buffer(Device& device) : Resource(device) {} ~Buffer() override; From 1966c4d591a04492ac4f38ecafd992e2cef20da4 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 23 Apr 2021 15:16:57 +0200 Subject: [PATCH 14/43] upgrade BDA to KHR variant :/ --- src/vulkan_platform.cpp | 33 +++++++++++++++++++++------------ src/vulkan_platform.h | 1 + 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 3264fb05..b1d4ce3b 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -111,7 +111,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); std::vector enabled_device_extensions { "VK_EXT_external_memory_host", - "VK_EXT_buffer_device_address" // not KHR version ! + "VK_KHR_buffer_device_address" }; uint32_t queue_families_count; @@ -147,8 +147,8 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic assert(false && "unsuitable device"); } - auto bda_features = VkPhysicalDeviceBufferDeviceAddressFeaturesEXT { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT, + auto bda_features = VkPhysicalDeviceBufferDeviceAddressFeaturesKHR { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR, .pNext = nullptr, .bufferDeviceAddress = true, }; @@ -156,6 +156,8 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &bda_features, .features = { + .shaderInt64 = true, + .shaderInt16 = true, } }; @@ -224,7 +226,7 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .pNext = nullptr, .flags = 0, .size = (VkDeviceSize) size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -235,9 +237,16 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { VkMemoryRequirements memory_requirements; vkGetBufferMemoryRequirements(device->device, buffer, &memory_requirements); + auto allocate_flags = VkMemoryAllocateFlagsInfo { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = nullptr, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, + .deviceMask = 0 + }; + auto allocation_info = VkMemoryAllocateInfo { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, + .pNext = &allocate_flags, .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements.memoryTypeBits, true), }; @@ -252,12 +261,12 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { res_buffer->id = id; res_buffer->buffer = buffer; - auto bda_info = VkBufferDeviceAddressInfoEXT { - .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT, + auto bda_info = VkBufferDeviceAddressInfoKHR { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, .pNext = nullptr, .buffer = res_buffer->buffer }; - VkDeviceAddress bda = vkGetBufferDeviceAddressEXT(device->device, &bda_info); + VkDeviceAddress bda = device->extension_fns.vkGetBufferDeviceAddressKHR(device->device, &bda_info); assert(bda != 0 && "BDA failed"); res_buffer->bda = bda; @@ -267,7 +276,7 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { } void* VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { - command_unavailable("alloc_host"); + return malloc(size); } void* VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { @@ -303,7 +312,7 @@ void VulkanPlatform::release(DeviceId dev, void *ptr) { } void VulkanPlatform::release_host(DeviceId dev, void *ptr) { - command_unavailable("release_host"); + free(ptr); } VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { @@ -486,7 +495,7 @@ void VulkanPlatform::Device::execute_command_buffer_oneshot(std::function Date: Fri, 23 Apr 2021 19:08:44 +0200 Subject: [PATCH 15/43] making validation happy --- src/anydsl_runtime.h | 3 +- src/anydsl_runtime.hpp | 3 +- src/vulkan_platform.cpp | 64 ++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/anydsl_runtime.h b/src/anydsl_runtime.h index 4c988dff..03024b1e 100644 --- a/src/anydsl_runtime.h +++ b/src/anydsl_runtime.h @@ -16,7 +16,8 @@ enum { ANYDSL_HOST = 0, ANYDSL_CUDA = 1, ANYDSL_OPENCL = 2, - ANYDSL_HSA = 3 + ANYDSL_HSA = 3, + ANYDSL_Vulkan = 4 }; AnyDSL_runtime_API void anydsl_info(void); diff --git a/src/anydsl_runtime.hpp b/src/anydsl_runtime.hpp index d2c63fbb..e73d7b8e 100644 --- a/src/anydsl_runtime.hpp +++ b/src/anydsl_runtime.hpp @@ -11,7 +11,8 @@ enum class Platform : int32_t { Host = ANYDSL_HOST, Cuda = ANYDSL_CUDA, OpenCL = ANYDSL_OPENCL, - HSA = ANYDSL_HSA + HSA = ANYDSL_HSA, + Vulkan = ANYDSL_Vulkan }; struct Device { diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index b1d4ce3b..5a4b4e49 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -152,9 +152,15 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .pNext = nullptr, .bufferDeviceAddress = true, }; + auto vk11_features = VkPhysicalDeviceVulkan11Features { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, + .pNext = &bda_features, + .variablePointersStorageBuffer = true, + .variablePointers = true, + }; auto enabled_features = VkPhysicalDeviceFeatures2 { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = &bda_features, + .pNext = &vk11_features, .features = { .shaderInt64 = true, .shaderInt16 = true, @@ -407,9 +413,9 @@ void VulkanPlatform::synchronize(DeviceId dev) { // TODO: don't wait for idle everywhere } -VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { - VkExternalMemoryHandleTypeFlagBits handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; +VkExternalMemoryHandleTypeFlagBits imported_host_memory_handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; +VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { // Align stuff size_t mask = ~(min_imported_host_ptr_alignment - 1); size_t host_ptr = (size_t)ptr; @@ -423,21 +429,21 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size VkMemoryHostPointerPropertiesEXT host_ptr_properties { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; - extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, handle_type, (void*)aligned_host_ptr, &host_ptr_properties); + extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties); uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, false); // Import memory auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, - .pNext = nullptr, - .handleType = handle_type, - .pHostPointer = (void*) aligned_host_ptr, + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .pNext = nullptr, + .handleType = imported_host_memory_handle_type, + .pHostPointer = (void*) aligned_host_ptr, }; auto allocation_info = VkMemoryAllocateInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &import_ptr_info, - .allocationSize = (VkDeviceSize) aligned_size, - .memoryTypeIndex = memory_type + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_ptr_info, + .allocationSize = (VkDeviceSize) aligned_size, + .memoryTypeIndex = memory_type }; VkDeviceMemory imported_memory; CHECK(vkAllocateMemory(device, &allocation_info, nullptr, &imported_memory)); @@ -470,24 +476,24 @@ void VulkanPlatform::Device::return_command_buffer(VkCommandBuffer cmd_buf) { void VulkanPlatform::Device::execute_command_buffer_oneshot(std::function fn) { VkCommandBuffer cmd_buf = obtain_command_buffer(); auto begin_command_buffer_info = VkCommandBufferBeginInfo { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, }; vkBeginCommandBuffer(cmd_buf, &begin_command_buffer_info); fn(cmd_buf); vkEndCommandBuffer(cmd_buf); auto submit_info = VkSubmitInfo { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = nullptr, - .waitSemaphoreCount = 0, - .pWaitSemaphores = nullptr, - .pWaitDstStageMask = nullptr, - .commandBufferCount = 1, - .pCommandBuffers = &cmd_buf, - .signalSemaphoreCount = 0, - .pSignalSemaphores = nullptr, + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = &cmd_buf, + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, }; vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE); vkDeviceWaitIdle(device); @@ -506,9 +512,15 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI // Import host memory and wrap it in a buffer size_t host_ptr = (size_t)src + offset_src; VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); + auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .pNext = nullptr, + .handleTypes = imported_host_memory_handle_type + }; + auto tmp_buffer_create_info = VkBufferCreateInfo { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, + .pNext = &external_mem_buffer_create_info, .flags = 0, .size = (VkDeviceSize) size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, From e37a2a46b863b010c3baf8b024d4b1113de798ec Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Tue, 27 Apr 2021 16:49:21 +0200 Subject: [PATCH 16/43] derp --- src/vulkan_platform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 5a4b4e49..98223a9b 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -400,7 +400,7 @@ void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_para uint64_t buffer_bda = dst_buffer_resource->bda; memcpy(push_constants.data() + offset, &buffer_bda, 8); offset += 8; - } { + } else { assert(false && "no struct support yet"); } } From 9f326cbcf9fe7b5a8a4b603e5d1b0748768cc7d9 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Tue, 27 Apr 2021 17:37:18 +0200 Subject: [PATCH 17/43] implement copy to host --- src/vulkan_platform.cpp | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 98223a9b..cd423112 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -547,7 +547,45 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI } void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { - command_unavailable("copy_to_host"); + auto& device = usable_devices[dev_src]; + auto src_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) src); + auto src_buffer = src_buffer_resource->buffer; + + // Import host memory and wrap it in a buffer + size_t host_ptr = (size_t)dst + offset_dst; + VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); + auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .pNext = nullptr, + .handleTypes = imported_host_memory_handle_type + }; + + auto tmp_buffer_create_info = VkBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = &external_mem_buffer_create_info, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + VkBuffer tmp_buffer; + vkCreateBuffer(device->device, &tmp_buffer_create_info, nullptr, &tmp_buffer); + vkBindBufferMemory(device->device, tmp_buffer, imported_memory, 0); + + device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { + VkBufferCopy copy_region { + .srcOffset = (VkDeviceSize) offset_src, + .dstOffset = 0, + .size = (VkDeviceSize) size, + }; + vkCmdCopyBuffer(cmd_buf, src_buffer, tmp_buffer, 1, ©_region); + }); + + // Cleanup + vkFreeMemory(device->device, imported_memory, nullptr); + vkDestroyBuffer(device->device, tmp_buffer, nullptr); } void register_vulkan_platform(Runtime* runtime) { From 7829ddc4c1b67e790166c2a906a326efe0da1bc3 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 28 Apr 2021 19:11:12 +0200 Subject: [PATCH 18/43] put kernels in unique_ptrs --- src/vulkan_platform.cpp | 35 ++++++++++++++++++----------------- src/vulkan_platform.h | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index cd423112..e69e609e 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -111,7 +111,8 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); std::vector enabled_device_extensions { "VK_EXT_external_memory_host", - "VK_KHR_buffer_device_address" + "VK_KHR_buffer_device_address", + "VK_KHR_shader_non_semantic_info" }; uint32_t queue_families_count; @@ -163,7 +164,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .pNext = &vk11_features, .features = { .shaderInt64 = true, - .shaderInt16 = true, + // .shaderInt16 = true, } }; @@ -324,8 +325,8 @@ void VulkanPlatform::release_host(DeviceId dev, void *ptr) { VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { auto ki = kernels.find(filename); if (ki == kernels.end()) { - auto [i,b] = kernels.emplace(filename, Kernel(*this)); - Kernel& kernel = i->second; + auto [i,b] = kernels.emplace(filename, std::make_unique(*this)); + Kernel* kernel = i->second.get(); std::string bin = platform.runtime_->load_file(filename); auto shader_module_create_info = VkShaderModuleCreateInfo { @@ -335,14 +336,14 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f .codeSize = bin.size(), .pCode = reinterpret_cast(bin.c_str()), }; - vkCreateShaderModule(device, &shader_module_create_info, nullptr, &kernel.shader_module); + CHECK(vkCreateShaderModule(device, &shader_module_create_info, nullptr, &kernel->shader_module)); auto stage = VkPipelineShaderStageCreateInfo { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = kernel.shader_module, + .module = kernel->shader_module, .pName = "kernel_main", .pSpecializationInfo = nullptr, }; @@ -363,22 +364,22 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f .pushConstantRangeCount = (uint32_t) push_constants.size(), .pPushConstantRanges = push_constants.data(), }; - vkCreatePipelineLayout(device, &layout_create_info, nullptr, &kernel.layout); + CHECK(vkCreatePipelineLayout(device, &layout_create_info, nullptr, &kernel-> layout)); auto compute_pipeline_create_info = VkComputePipelineCreateInfo { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage = stage, - .layout = kernel.layout, + .layout = kernel->layout, .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0, }; - CHECK(vkCreateComputePipelines(device, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel.pipeline)); - return &kernel; + CHECK(vkCreateComputePipelines(device, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel->pipeline)); + return kernel; } - return &ki->second; + return ki->second.get(); } void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { @@ -429,7 +430,7 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size VkMemoryHostPointerPropertiesEXT host_ptr_properties { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; - extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties); + CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties)); uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, false); // Import memory @@ -464,7 +465,7 @@ VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { .commandBufferCount = 1 }; VkCommandBuffer cmd_buf; - vkAllocateCommandBuffers(device, &cmd_buf_create_info, &cmd_buf); + CHECK(vkAllocateCommandBuffers(device, &cmd_buf_create_info, &cmd_buf)); return cmd_buf; } @@ -481,9 +482,9 @@ void VulkanPlatform::Device::execute_command_buffer_oneshot(std::function spare_cmd_bufs; - std::unordered_map kernels; + std::unordered_map> kernels; ExtensionFns extension_fns; Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); From b164f496ba443175c3fa06445543d48f9608c159 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 28 Apr 2021 20:37:12 +0200 Subject: [PATCH 19/43] refactored mem management --- src/vulkan_platform.cpp | 113 +++++++++++++++++++++++++++++++--------- src/vulkan_platform.h | 12 ++++- 2 files changed, 99 insertions(+), 26 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index e69e609e..b03c4bc7 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -24,6 +24,14 @@ inline std::vector query_extensions_available() { return exts; } +inline bool is_ext_available(std::vector& ext_props, std::string ext_name) { + for (auto& ext : ext_props) { + if (strcmp(ext.extensionName, ext_name.c_str()) == 0) + return true; + } + return false; +} + VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { auto available_layers = query_layers_available(); auto available_instance_extensions = query_extensions_available(); @@ -109,12 +117,17 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); std::vector available_device_extensions(exts_count); vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, available_device_extensions.data()); + std::vector enabled_device_extensions { - "VK_EXT_external_memory_host", "VK_KHR_buffer_device_address", "VK_KHR_shader_non_semantic_info" }; + if (is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { + enabled_device_extensions.push_back("VK_EXT_external_memory_host"); + can_import_host_memory = true; + } else assert(false); + uint32_t queue_families_count; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); std::vector queue_families(queue_families_count); @@ -208,26 +221,32 @@ VulkanPlatform::Device::~Device() { vkDestroyDevice(device, nullptr); } -uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits, bool prefer_device_local) { +uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits, VulkanPlatform::Device::AllocHeap heap) { VkPhysicalDeviceMemoryProperties device_memory_properties; vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); for (size_t bit = 0; bit < 32; bit++) { auto& memory_type = device_memory_properties.memoryTypes[bit]; auto& memory_heap = device_memory_properties.memoryHeaps[memory_type.heapIndex]; + bool is_host_visible = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + bool is_host_coherent = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; bool is_device_local = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; if ((memory_type_bits & (1 << bit)) != 0) { - if (!prefer_device_local || is_device_local) - return bit; + switch (heap) { + case AllocHeap::DEVICE_LOCAL: + if (is_device_local) return bit; + break; + case AllocHeap::HOST_VISIBLE: + if (is_host_visible && is_host_coherent) return bit; + break; + } } } assert(false && "Unable to find a suitable memory type"); } -void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { - auto& device = usable_devices[dev]; - +VulkanPlatform::Buffer* VulkanPlatform::Device::alloc_internal(int64_t size, AllocHeap heap) { auto buffer_create_info = VkBufferCreateInfo { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -239,10 +258,10 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .pQueueFamilyIndices = nullptr, }; VkBuffer buffer; - vkCreateBuffer(device->device, &buffer_create_info, nullptr, &buffer); + vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer); VkMemoryRequirements memory_requirements; - vkGetBufferMemoryRequirements(device->device, buffer, &memory_requirements); + vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); auto allocate_flags = VkMemoryAllocateFlagsInfo { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, @@ -255,15 +274,15 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &allocate_flags, .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = device->find_suitable_memory_type(memory_requirements.memoryTypeBits, true), + .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), }; VkDeviceMemory memory; - vkAllocateMemory(device->device, &allocation_info, nullptr, &memory); + vkAllocateMemory(device, &allocation_info, nullptr, &memory); - vkBindBufferMemory(device->device, buffer, memory, 0); - size_t id = device->next_resource_id++; + vkBindBufferMemory(device, buffer, memory, 0); + size_t id = next_resource_id++; - std::unique_ptr res_buffer = std::make_unique(*device); + std::unique_ptr res_buffer = std::make_unique(*this); res_buffer->alloc = memory; res_buffer->id = id; res_buffer->buffer = buffer; @@ -273,17 +292,30 @@ void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { .pNext = nullptr, .buffer = res_buffer->buffer }; - VkDeviceAddress bda = device->extension_fns.vkGetBufferDeviceAddressKHR(device->device, &bda_info); + VkDeviceAddress bda = extension_fns.vkGetBufferDeviceAddressKHR(device, &bda_info); assert(bda != 0 && "BDA failed"); res_buffer->bda = bda; - device->resources.push_back(std::move(res_buffer)); + resources.push_back(std::move(res_buffer)); - return reinterpret_cast(id); + return reinterpret_cast(resources.back().get()); +} + +void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { + auto& device = usable_devices[dev]; + auto resource = device->alloc_internal(size, VulkanPlatform::Device::AllocHeap::DEVICE_LOCAL); + return (void*) ((size_t) resource->bda); } void* VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { - return malloc(size); + auto& device = usable_devices[dev]; + if (device->can_import_host_memory) + return malloc(size); + else { + auto id = device->alloc_internal(size, VulkanPlatform::Device::AllocHeap::HOST_VISIBLE); + // TODO map it + assert(false); + } } void* VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { @@ -298,6 +330,31 @@ VulkanPlatform::Resource* VulkanPlatform::Device::find_resource_by_id(size_t id) } i++; } + assert(false && "cannot find resource"); + return nullptr; +} + +VulkanPlatform::Buffer* VulkanPlatform::Device::find_buffer_by_device_address(uint64_t bda) { + size_t i = 0; + for (auto& resource : resources) { + if (auto buffer = dynamic_cast(resource.get()); buffer->bda == bda) { + return buffer; + } + i++; + } + assert(false && "cannot find resource"); + return nullptr; +} + +VulkanPlatform::Buffer* VulkanPlatform::Device::find_buffer_by_host_address(size_t host_address) { + size_t i = 0; + for (auto& resource : resources) { + if (auto buffer = dynamic_cast(resource.get()); buffer->mapped_host_address == host_address) { + return buffer; + } + i++; + } + assert(false && "cannot find resource"); return nullptr; } @@ -306,10 +363,10 @@ void VulkanPlatform::release(DeviceId dev, void *ptr) { return; auto& device = usable_devices[dev]; - size_t id = reinterpret_cast(ptr); + auto bda = reinterpret_cast(ptr); size_t i = 0; for (auto& resource : device->resources) { - if (resource->id == id) { + if (auto buffer = dynamic_cast(resource.get()); buffer->bda == bda) { device->resources.erase(device->resources.begin() + i); return; } @@ -319,7 +376,11 @@ void VulkanPlatform::release(DeviceId dev, void *ptr) { } void VulkanPlatform::release_host(DeviceId dev, void *ptr) { - free(ptr); + auto& device = usable_devices[dev]; + if (device->can_import_host_memory) + free(ptr); + else + release(dev, ptr); } VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { @@ -397,7 +458,7 @@ void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_para offset += 4; } else if (launch_params.args.types[arg] == KernelArgType::Ptr) { void* buffer = *(void**)launch_params.args.data[arg]; - auto dst_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) buffer); + auto dst_buffer_resource = (Buffer*) device->find_buffer_by_device_address((uint64_t) buffer); uint64_t buffer_bda = dst_buffer_resource->bda; memcpy(push_constants.data() + offset, &buffer_bda, 8); offset += 8; @@ -417,6 +478,8 @@ void VulkanPlatform::synchronize(DeviceId dev) { VkExternalMemoryHandleTypeFlagBits imported_host_memory_handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { + assert(can_import_host_memory && "This device does not support importing host memory"); + // Align stuff size_t mask = ~(min_imported_host_ptr_alignment - 1); size_t host_ptr = (size_t)ptr; @@ -431,7 +494,7 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties)); - uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, false); + uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, AllocHeap::HOST_VISIBLE); // Import memory auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { @@ -507,7 +570,7 @@ void VulkanPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { auto& device = usable_devices[dev_dst]; - auto dst_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) dst); + auto dst_buffer_resource = device->find_buffer_by_device_address((uint64_t) dst); auto dst_buffer = dst_buffer_resource->buffer; // Import host memory and wrap it in a buffer @@ -549,7 +612,7 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { auto& device = usable_devices[dev_src]; - auto src_buffer_resource = (Buffer*) device->find_resource_by_id((size_t) src); + auto src_buffer_resource = device->find_buffer_by_device_address((uint64_t) src); auto src_buffer = src_buffer_resource->buffer; // Import host memory and wrap it in a buffer diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 46552f6c..e8f25382 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -55,6 +55,7 @@ class VulkanPlatform : public Platform { struct Buffer : public Resource { VkBuffer buffer; uint64_t bda = -1; + size_t mapped_host_address = 0; Buffer(Device& device) : Resource(device) {} ~Buffer() override; @@ -78,12 +79,18 @@ class VulkanPlatform : public Platform { }; struct Device { + enum class AllocHeap { + DEVICE_LOCAL, + HOST_VISIBLE + }; + VulkanPlatform& platform; VkPhysicalDevice physical_device; size_t device_id; VkDevice device = nullptr; size_t min_imported_host_ptr_alignment; + bool can_import_host_memory = false; std::vector> resources; size_t next_resource_id = 1; // resource id 0 is reserved @@ -97,8 +104,11 @@ class VulkanPlatform : public Platform { ~Device(); Resource* find_resource_by_id(size_t id); - uint32_t find_suitable_memory_type(uint32_t memory_type_bits, bool prefer_device_local); + Buffer* find_buffer_by_device_address(uint64_t bda); + Buffer* find_buffer_by_host_address(size_t host_address); + uint32_t find_suitable_memory_type(uint32_t memory_type_bits, AllocHeap); VkDeviceMemory import_host_memory(void* ptr, size_t size); + Buffer* alloc_internal(int64_t, AllocHeap); VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); Kernel* load_kernel(const std::string&); From 5d32e1373fd20fe782b471d9b664b8bd7b125744 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 28 Apr 2021 21:06:41 +0200 Subject: [PATCH 20/43] enable some capabilities for debug printf --- src/vulkan_platform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index b03c4bc7..a79d6789 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -176,6 +176,8 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &vk11_features, .features = { + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, .shaderInt64 = true, // .shaderInt16 = true, } From b33b403b925ba48167ea296192f5b6bdec0db14e Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 29 Apr 2021 12:42:50 +0200 Subject: [PATCH 21/43] working upload/download (using staging bufs) --- src/vulkan_platform.cpp | 169 +++++++++++++++++++++++----------------- src/vulkan_platform.h | 15 ++-- 2 files changed, 107 insertions(+), 77 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index a79d6789..2812f6a6 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -123,10 +123,10 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic "VK_KHR_shader_non_semantic_info" }; - if (is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { + if (false && is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { enabled_device_extensions.push_back("VK_EXT_external_memory_host"); can_import_host_memory = true; - } else assert(false); + } uint32_t queue_families_count; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); @@ -248,16 +248,16 @@ uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_ assert(false && "Unable to find a suitable memory type"); } -VulkanPlatform::Buffer* VulkanPlatform::Device::alloc_internal(int64_t size, AllocHeap heap) { +std::pair VulkanPlatform::Device::allocate_buffer(int64_t size, VkBufferUsageFlags usage_flags, AllocHeap heap) { auto buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = usage_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, }; VkBuffer buffer; vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer); @@ -266,22 +266,28 @@ VulkanPlatform::Buffer* VulkanPlatform::Device::alloc_internal(int64_t size, All vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); auto allocate_flags = VkMemoryAllocateFlagsInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, - .pNext = nullptr, - .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, - .deviceMask = 0 + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = nullptr, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, + .deviceMask = 0 }; auto allocation_info = VkMemoryAllocateInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &allocate_flags, - .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &allocate_flags, + .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! + .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), }; VkDeviceMemory memory; vkAllocateMemory(device, &allocation_info, nullptr, &memory); - vkBindBufferMemory(device, buffer, memory, 0); + + return std::make_pair(buffer, memory); +} + +VulkanPlatform::Buffer* VulkanPlatform::Device::create_buffer_resource(int64_t size, VkBufferUsageFlags usage_flags, AllocHeap heap) { + auto [buffer, memory] = allocate_buffer(size, usage_flags, heap); + size_t id = next_resource_id++; std::unique_ptr res_buffer = std::make_unique(*this); @@ -303,9 +309,16 @@ VulkanPlatform::Buffer* VulkanPlatform::Device::alloc_internal(int64_t size, All return reinterpret_cast(resources.back().get()); } +constexpr VkBufferUsageFlags general_purpose_buffer_flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR + ; + void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { auto& device = usable_devices[dev]; - auto resource = device->alloc_internal(size, VulkanPlatform::Device::AllocHeap::DEVICE_LOCAL); + auto resource = device->create_buffer_resource(size, general_purpose_buffer_flags, VulkanPlatform::Device::AllocHeap::DEVICE_LOCAL); return (void*) ((size_t) resource->bda); } @@ -314,7 +327,7 @@ void* VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { if (device->can_import_host_memory) return malloc(size); else { - auto id = device->alloc_internal(size, VulkanPlatform::Device::AllocHeap::HOST_VISIBLE); + auto id = device->create_buffer_resource(size, general_purpose_buffer_flags, VulkanPlatform::Device::AllocHeap::HOST_VISIBLE); // TODO map it assert(false); } @@ -516,6 +529,29 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size return imported_memory; } +std::pair VulkanPlatform::Device::import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags) { + VkDeviceMemory imported_memory = import_host_memory(ptr, size); + auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .pNext = nullptr, + .handleTypes = imported_host_memory_handle_type + }; + auto tmp_buffer_create_info = VkBufferCreateInfo { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = &external_mem_buffer_create_info, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = usage_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + VkBuffer buffer; + vkCreateBuffer(device, &tmp_buffer_create_info, nullptr, &buffer); + vkBindBufferMemory(device, buffer, imported_memory, 0); + return std::make_pair(buffer, imported_memory); +} + VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { if (spare_cmd_bufs.size() > 0) { VkCommandBuffer cmd_buf = spare_cmd_bufs.back(); @@ -575,40 +611,33 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI auto dst_buffer_resource = device->find_buffer_by_device_address((uint64_t) dst); auto dst_buffer = dst_buffer_resource->buffer; - // Import host memory and wrap it in a buffer - size_t host_ptr = (size_t)src + offset_src; - VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); - auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .pNext = nullptr, - .handleTypes = imported_host_memory_handle_type - }; - - auto tmp_buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = &external_mem_buffer_create_info, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; VkBuffer tmp_buffer; - vkCreateBuffer(device->device, &tmp_buffer_create_info, nullptr, &tmp_buffer); - vkBindBufferMemory(device->device, tmp_buffer, imported_memory, 0); + VkDeviceMemory memory; + + void* host_ptr = (void*)((size_t)src + offset_src); + if (device->can_import_host_memory) { + // Import host memory and wrap it in a buffer + std::tie(tmp_buffer, memory) = device->import_host_memory_as_buffer(host_ptr, size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + } else { + std::tie(tmp_buffer, memory) = device->allocate_buffer(size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, Device::AllocHeap::HOST_VISIBLE); + void* mapped = nullptr; + CHECK(vkMapMemory(device->device, memory, 0, size, 0, &mapped)); + assert(mapped != nullptr); + memcpy(mapped, host_ptr, size); + vkUnmapMemory(device->device, memory); + } device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { VkBufferCopy copy_region { - .srcOffset = 0, - .dstOffset = (VkDeviceSize) offset_dst, - .size = (VkDeviceSize) size, + .srcOffset = 0, + .dstOffset = (VkDeviceSize) offset_dst, + .size = (VkDeviceSize) size, }; vkCmdCopyBuffer(cmd_buf, tmp_buffer, dst_buffer, 1, ©_region); }); // Cleanup - vkFreeMemory(device->device, imported_memory, nullptr); + vkFreeMemory(device->device, memory, nullptr); vkDestroyBuffer(device->device, tmp_buffer, nullptr); } @@ -617,40 +646,36 @@ void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t off auto src_buffer_resource = device->find_buffer_by_device_address((uint64_t) src); auto src_buffer = src_buffer_resource->buffer; - // Import host memory and wrap it in a buffer - size_t host_ptr = (size_t)dst + offset_dst; - VkDeviceMemory imported_memory = device->import_host_memory((void*)host_ptr, size); - auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .pNext = nullptr, - .handleTypes = imported_host_memory_handle_type - }; - - auto tmp_buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = &external_mem_buffer_create_info, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; VkBuffer tmp_buffer; - vkCreateBuffer(device->device, &tmp_buffer_create_info, nullptr, &tmp_buffer); - vkBindBufferMemory(device->device, tmp_buffer, imported_memory, 0); + VkDeviceMemory memory; + + void* host_ptr = (void*)((size_t)dst + offset_dst); + if (device->can_import_host_memory) { + // Import host memory and wrap it in a buffer + std::tie(tmp_buffer, memory) = device->import_host_memory_as_buffer(host_ptr, size, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + } else { + std::tie(tmp_buffer, memory) = device->allocate_buffer(size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, Device::AllocHeap::HOST_VISIBLE); + } device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { VkBufferCopy copy_region { - .srcOffset = (VkDeviceSize) offset_src, - .dstOffset = 0, - .size = (VkDeviceSize) size, + .srcOffset = (VkDeviceSize) offset_src, + .dstOffset = 0, + .size = (VkDeviceSize) size, }; vkCmdCopyBuffer(cmd_buf, src_buffer, tmp_buffer, 1, ©_region); }); + if (!device->can_import_host_memory) { + void* mapped = nullptr; + CHECK(vkMapMemory(device->device, memory, 0, size, 0, &mapped)); + assert(mapped != nullptr); + memcpy(host_ptr, mapped, size); + vkUnmapMemory(device->device, memory); + } + // Cleanup - vkFreeMemory(device->device, imported_memory, nullptr); + vkFreeMemory(device->device, memory, nullptr); vkDestroyBuffer(device->device, tmp_buffer, nullptr); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index e8f25382..5763f6b1 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -103,17 +103,22 @@ class VulkanPlatform : public Platform { Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); + uint32_t find_suitable_memory_type(uint32_t memory_type_bits, AllocHeap); + + VkDeviceMemory import_host_memory(void* ptr, size_t size); + std::pair import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags); + std::pair allocate_buffer(int64_t, VkBufferUsageFlags usage_flags, AllocHeap); + Resource* find_resource_by_id(size_t id); + Buffer* create_buffer_resource(int64_t, VkBufferUsageFlags usage_flags, AllocHeap); Buffer* find_buffer_by_device_address(uint64_t bda); Buffer* find_buffer_by_host_address(size_t host_address); - uint32_t find_suitable_memory_type(uint32_t memory_type_bits, AllocHeap); - VkDeviceMemory import_host_memory(void* ptr, size_t size); - Buffer* alloc_internal(int64_t, AllocHeap); + VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); - Kernel* load_kernel(const std::string&); - void execute_command_buffer_oneshot(std::function fn); + + Kernel* load_kernel(const std::string&); }; VkInstance instance; From 80bb4c330fc623b55f14803a2eb42458f8b7cc8d Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 29 Apr 2021 16:16:54 +0200 Subject: [PATCH 22/43] fixed importing memory --- src/vulkan_platform.cpp | 15 ++++++++++----- src/vulkan_platform.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 2812f6a6..ae91863f 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -123,7 +123,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic "VK_KHR_shader_non_semantic_info" }; - if (false && is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { + if (is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { enabled_device_extensions.push_back("VK_EXT_external_memory_host"); can_import_host_memory = true; } @@ -492,7 +492,7 @@ void VulkanPlatform::synchronize(DeviceId dev) { VkExternalMemoryHandleTypeFlagBits imported_host_memory_handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; -VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { +std::pair VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { assert(can_import_host_memory && "This device does not support importing host memory"); // Align stuff @@ -504,6 +504,9 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size size_t aligned_end = ((end + min_imported_host_ptr_alignment - 1) / min_imported_host_ptr_alignment) * min_imported_host_ptr_alignment; size_t aligned_size = aligned_end - aligned_host_ptr; + // where the memory we wanted to import will actually start + size_t offset = host_ptr - aligned_host_ptr; + // Find the corresponding device memory type index VkMemoryHostPointerPropertiesEXT host_ptr_properties { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, @@ -526,11 +529,13 @@ VkDeviceMemory VulkanPlatform::Device::import_host_memory(void *ptr, size_t size }; VkDeviceMemory imported_memory; CHECK(vkAllocateMemory(device, &allocation_info, nullptr, &imported_memory)); - return imported_memory; + return std::make_pair(imported_memory, offset); } std::pair VulkanPlatform::Device::import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags) { - VkDeviceMemory imported_memory = import_host_memory(ptr, size); + VkDeviceMemory imported_memory; + size_t imported_offset; + std::tie(imported_memory, imported_offset) = import_host_memory(ptr, size); auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -548,7 +553,7 @@ std::pair VulkanPlatform::Device::import_host_memory_a }; VkBuffer buffer; vkCreateBuffer(device, &tmp_buffer_create_info, nullptr, &buffer); - vkBindBufferMemory(device, buffer, imported_memory, 0); + vkBindBufferMemory(device, buffer, imported_memory, imported_offset); return std::make_pair(buffer, imported_memory); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 5763f6b1..7b8a7b90 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -105,7 +105,7 @@ class VulkanPlatform : public Platform { uint32_t find_suitable_memory_type(uint32_t memory_type_bits, AllocHeap); - VkDeviceMemory import_host_memory(void* ptr, size_t size); + std::pair import_host_memory(void* ptr, size_t size); std::pair import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags); std::pair allocate_buffer(int64_t, VkBufferUsageFlags usage_flags, AllocHeap); From 79b9f68c4bceb8cc1208f869d3d46cdfcb934af6 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Mon, 3 May 2021 16:26:08 +0200 Subject: [PATCH 23/43] cleanup --- src/vulkan_platform.cpp | 57 +++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index ae91863f..26695283 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -123,6 +123,7 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic "VK_KHR_shader_non_semantic_info" }; + // Use this to import host memory as GPU-visible memory, otherwise use a fallback path that copies when uploading/downloading if (is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { enabled_device_extensions.push_back("VK_EXT_external_memory_host"); can_import_host_memory = true; @@ -250,14 +251,14 @@ uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_ std::pair VulkanPlatform::Device::allocate_buffer(int64_t size, VkBufferUsageFlags usage_flags, AllocHeap heap) { auto buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = usage_flags, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = usage_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, }; VkBuffer buffer; vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer); @@ -266,17 +267,17 @@ std::pair VulkanPlatform::Device::allocate_buffer(int6 vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); auto allocate_flags = VkMemoryAllocateFlagsInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, - .pNext = nullptr, - .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, - .deviceMask = 0 + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = nullptr, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, + .deviceMask = 0 }; auto allocation_info = VkMemoryAllocateInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &allocate_flags, - .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &allocate_flags, + .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! + .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), }; VkDeviceMemory memory; vkAllocateMemory(device, &allocation_info, nullptr, &memory); @@ -301,7 +302,7 @@ VulkanPlatform::Buffer* VulkanPlatform::Device::create_buffer_resource(int64_t s .buffer = res_buffer->buffer }; VkDeviceAddress bda = extension_fns.vkGetBufferDeviceAddressKHR(device, &bda_info); - assert(bda != 0 && "BDA failed"); + assert(bda != 0 && "vkGetBufferDeviceAddress failed"); res_buffer->bda = bda; resources.push_back(std::move(res_buffer)); @@ -537,19 +538,19 @@ std::pair VulkanPlatform::Device::import_host_memory_a size_t imported_offset; std::tie(imported_memory, imported_offset) = import_host_memory(ptr, size); auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .pNext = nullptr, - .handleTypes = imported_host_memory_handle_type + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .pNext = nullptr, + .handleTypes = imported_host_memory_handle_type }; auto tmp_buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = &external_mem_buffer_create_info, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = usage_flags, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = &external_mem_buffer_create_info, + .flags = 0, + .size = (VkDeviceSize) size, + .usage = usage_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, }; VkBuffer buffer; vkCreateBuffer(device, &tmp_buffer_create_info, nullptr, &buffer); From 146fb63411a8469fb061256890b1eae8c821a925 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 5 May 2021 10:42:47 +0200 Subject: [PATCH 24/43] draft for vk intrinsics bindings --- platforms/artic/intrinsics_vulkan.impala | 155 +++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 platforms/artic/intrinsics_vulkan.impala diff --git a/platforms/artic/intrinsics_vulkan.impala b/platforms/artic/intrinsics_vulkan.impala new file mode 100644 index 00000000..44a6d75a --- /dev/null +++ b/platforms/artic/intrinsics_vulkan.impala @@ -0,0 +1,155 @@ +// no declarations are emitted for "device" functions +#[import(cc = "device", name = "barrier")] fn vulkan_barrier(u32) -> (); +#[import(cc = "device", name = "exp")] fn vulkan_expf(f32) -> f32; +#[import(cc = "device", name = "exp2")] fn vulkan_exp2f(f32) -> f32; +#[import(cc = "device", name = "log")] fn vulkan_logf(f32) -> f32; +#[import(cc = "device", name = "log2")] fn vulkan_log2f(f32) -> f32; +#[import(cc = "device", name = "pow")] fn vulkan_powf(f32, f32) -> f32; +#[import(cc = "device", name = "rsqrt")] fn vulkan_rsqrtf(f32) -> f32; +#[import(cc = "device", name = "sqrt")] fn vulkan_sqrtf(f32) -> f32; +#[import(cc = "device", name = "fabs")] fn vulkan_fabsf(f32) -> f32; +#[import(cc = "device", name = "sin")] fn vulkan_sinf(f32) -> f32; +#[import(cc = "device", name = "cos")] fn vulkan_cosf(f32) -> f32; +#[import(cc = "device", name = "tan")] fn vulkan_tanf(f32) -> f32; +#[import(cc = "device", name = "asin")] fn vulkan_asinf(f32) -> f32; +#[import(cc = "device", name = "acos")] fn vulkan_acosf(f32) -> f32; +#[import(cc = "device", name = "atan")] fn vulkan_atanf(f32) -> f32; +#[import(cc = "device", name = "erf")] fn vulkan_erff(f32) -> f32; +#[import(cc = "device", name = "atan2")] fn vulkan_atan2f(f32, f32) -> f32; +#[import(cc = "device", name = "fmod")] fn vulkan_fmodf(f32, f32) -> f32; +#[import(cc = "device", name = "floor")] fn vulkan_floorf(f32) -> f32; +#[import(cc = "device", name = "isinf")] fn vulkan_isinff(f32) -> i32; +#[import(cc = "device", name = "isnan")] fn vulkan_isnanf(f32) -> i32; +#[import(cc = "device", name = "isfinite")] fn vulkan_isfinitef(f32) -> i32; +#[import(cc = "device", name = "fma")] fn vulkan_fmaf(f32, f32, f32) -> f32; +#[import(cc = "device", name = "mad")] fn vulkan_madf(f32, f32, f32) -> f32; +#[import(cc = "device", name = "copysign")] fn vulkan_copysignf(f32, f32) -> f32; +#[import(cc = "device", name = "exp")] fn vulkan_exp(f64) -> f64; +#[import(cc = "device", name = "exp2")] fn vulkan_exp2(f64) -> f64; +#[import(cc = "device", name = "log")] fn vulkan_log(f64) -> f64; +#[import(cc = "device", name = "log2")] fn vulkan_log2(f64) -> f64; +#[import(cc = "device", name = "pow")] fn vulkan_pow(f64, f64) -> f64; +#[import(cc = "device", name = "rsqrt")] fn vulkan_rsqrt(f64) -> f64; +#[import(cc = "device", name = "sqrt")] fn vulkan_sqrt(f64) -> f64; +#[import(cc = "device", name = "fabs")] fn vulkan_fabs(f64) -> f64; +#[import(cc = "device", name = "sin")] fn vulkan_sin(f64) -> f64; +#[import(cc = "device", name = "cos")] fn vulkan_cos(f64) -> f64; +#[import(cc = "device", name = "tan")] fn vulkan_tan(f64) -> f64; +#[import(cc = "device", name = "asin")] fn vulkan_asin(f64) -> f64; +#[import(cc = "device", name = "acos")] fn vulkan_acos(f64) -> f64; +#[import(cc = "device", name = "atan")] fn vulkan_atan(f64) -> f64; +#[import(cc = "device", name = "erf")] fn vulkan_erf(f64) -> f64; +#[import(cc = "device", name = "atan2")] fn vulkan_atan2(f64, f64) -> f64; +#[import(cc = "device", name = "fmod")] fn vulkan_fmod(f64, f64) -> f64; +#[import(cc = "device", name = "floor")] fn vulkan_floor(f64) -> f64; +#[import(cc = "device", name = "isinf")] fn vulkan_isinf(f64) -> i32; +#[import(cc = "device", name = "isnan")] fn vulkan_isnan(f64) -> i32; +#[import(cc = "device", name = "isfinite")] fn vulkan_isfinite(f64) -> i32; +#[import(cc = "device", name = "fma")] fn vulkan_fma(f64, f64, f64) -> f64; +#[import(cc = "device", name = "mad")] fn vulkan_mad(f64, f64, f64) -> f64; +#[import(cc = "device", name = "copysign")] fn vulkan_copysign(f64, f64) -> f64; +#[import(cc = "device", name = "fmin")] fn vulkan_fminf(f32, f32) -> f32; +#[import(cc = "device", name = "fmax")] fn vulkan_fmaxf(f32, f32) -> f32; +#[import(cc = "device", name = "fmin")] fn vulkan_fmin(f64, f64) -> f64; +#[import(cc = "device", name = "fmax")] fn vulkan_fmax(f64, f64) -> f64; +#[import(cc = "device", name = "min")] fn vulkan_min(i32, i32) -> i32; +#[import(cc = "device", name = "max")] fn vulkan_max(i32, i32) -> i32; +#[import(cc = "device", name = "atomic_add")] fn vulkan_atomic_add_global(&mut addrspace(1)i32, i32) -> i32; +#[import(cc = "device", name = "atomic_add")] fn vulkan_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32; +#[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_global(&mut addrspace(1)i32, i32) -> i32; +#[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32; +#[import(cc = "device", name = "get_work_dim")] fn vulkan_get_work_dim() -> u32; +#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> u64; +#[import(cc = "device", name = "get_global_id")] fn vulkan_get_global_id(u32) -> u64; +#[import(cc = "device", name = "get_local_size")] fn vulkan_get_local_size(u32) -> u64; +#[import(cc = "device", name = "get_local_id")] fn vulkan_get_local_id(u32) -> u64; +#[import(cc = "device", name = "get_num_groups")] fn vulkan_get_num_groups(u32) -> u64; +#[import(cc = "device", name = "get_group_id")] fn vulkan_get_group_id(u32) -> u64; +#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> u64; + +#[import(cc = "device", name = "spirv.nonsemantic.printf")] fn shader_printf[T](T) -> (); + +fn @vulkan_accelerator(dev: i32) = Accelerator { + exec = @|body| |grid, block| { + let work_item = WorkItem { + tidx = @|| vulkan_get_local_id(0) as i32, + tidy = @|| vulkan_get_local_id(1) as i32, + tidz = @|| vulkan_get_local_id(2) as i32, + bidx = @|| vulkan_get_group_id(0) as i32, + bidy = @|| vulkan_get_group_id(1) as i32, + bidz = @|| vulkan_get_group_id(2) as i32, + gidx = @|| vulkan_get_global_id(0) as i32, + gidy = @|| vulkan_get_global_id(1) as i32, + gidz = @|| vulkan_get_global_id(2) as i32, + bdimx = @|| vulkan_get_local_size(0) as i32, + bdimy = @|| vulkan_get_local_size(1) as i32, + bdimz = @|| vulkan_get_local_size(2) as i32, + gdimx = @|| vulkan_get_global_size(0) as i32, + gdimy = @|| vulkan_get_global_size(1) as i32, + gdimz = @|| vulkan_get_global_size(2) as i32, + nblkx = @|| vulkan_get_num_groups(0) as i32, + nblky = @|| vulkan_get_num_groups(1) as i32, + nblkz = @|| vulkan_get_num_groups(2) as i32 + }; + spirv(dev, grid, block, || @body(work_item)) + }, + sync = @|| synchronize_vulkan(dev), + alloc = @|size| alloc_vulkan(dev, size), + alloc_unified = @|size| alloc_opencl_unified(dev, size), + barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE), +}; + +static vk_intrinsics = Intrinsics { + expf = vulkan_expf, + exp2f = vulkan_exp2f, + logf = vulkan_logf, + log2f = vulkan_log2f, + powf = vulkan_powf, + rsqrtf = vulkan_rsqrtf, + sqrtf = vulkan_sqrtf, + fabsf = vulkan_fabsf, + sinf = vulkan_sinf, + cosf = vulkan_cosf, + tanf = vulkan_tanf, + asinf = vulkan_asinf, + acosf = vulkan_acosf, + atanf = vulkan_atanf, + erff = vulkan_erff, + atan2f = vulkan_atan2f, + copysignf = vulkan_copysignf, + fmaf = vulkan_fmaf, + fmaxf = vulkan_fmaxf, + fminf = vulkan_fminf, + fmodf = vulkan_fmodf, + floorf = vulkan_floorf, + isinff = vulkan_isinff, + isnanf = vulkan_isnanf, + isfinitef = vulkan_isfinitef, + exp = vulkan_exp, + exp2 = vulkan_exp2, + log = vulkan_log, + log2 = vulkan_log2, + pow = vulkan_pow, + rsqrt = vulkan_rsqrt, + sqrt = vulkan_sqrt, + fabs = vulkan_fabs, + sin = vulkan_sin, + cos = vulkan_cos, + tan = vulkan_tan, + asin = vulkan_asin, + acos = vulkan_acos, + atan = vulkan_atan, + erf = vulkan_erf, + atan2 = vulkan_atan2, + copysign = vulkan_copysign, + fma = vulkan_fma, + fmax = vulkan_fmax, + fmin = vulkan_fmin, + fmod = vulkan_fmod, + floor = vulkan_floor, + isinf = vulkan_isinf, + isnan = vulkan_isnan, + isfinite = vulkan_isfinite, + min = vulkan_min, + max = vulkan_max, +}; From 6625cd7b80e5ad7432bb7dbcd854817a979f6776 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 5 May 2021 19:23:29 +0200 Subject: [PATCH 25/43] fiddling --- platforms/artic/intrinsics_vulkan.impala | 50 ++++++++++++------------ src/vulkan_platform.cpp | 2 +- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/platforms/artic/intrinsics_vulkan.impala b/platforms/artic/intrinsics_vulkan.impala index 44a6d75a..7687da2a 100644 --- a/platforms/artic/intrinsics_vulkan.impala +++ b/platforms/artic/intrinsics_vulkan.impala @@ -59,37 +59,37 @@ #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_global(&mut addrspace(1)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32; #[import(cc = "device", name = "get_work_dim")] fn vulkan_get_work_dim() -> u32; -#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> u64; -#[import(cc = "device", name = "get_global_id")] fn vulkan_get_global_id(u32) -> u64; -#[import(cc = "device", name = "get_local_size")] fn vulkan_get_local_size(u32) -> u64; -#[import(cc = "device", name = "get_local_id")] fn vulkan_get_local_id(u32) -> u64; -#[import(cc = "device", name = "get_num_groups")] fn vulkan_get_num_groups(u32) -> u64; -#[import(cc = "device", name = "get_group_id")] fn vulkan_get_group_id(u32) -> u64; -#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> u64; +#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> i32; +#[import(cc = "device", name = "get_global_id")] fn vulkan_get_global_id(u32) -> i32; +#[import(cc = "device", name = "get_local_size")] fn vulkan_get_local_size(u32) -> i32; +#[import(cc = "device", name = "get_local_id")] fn vulkan_get_local_id(u32) -> i32; +#[import(cc = "device", name = "get_num_groups")] fn vulkan_get_num_groups(u32) -> i32; +#[import(cc = "device", name = "get_group_id")] fn vulkan_get_group_id(u32) -> i32; +#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> i32; #[import(cc = "device", name = "spirv.nonsemantic.printf")] fn shader_printf[T](T) -> (); fn @vulkan_accelerator(dev: i32) = Accelerator { exec = @|body| |grid, block| { let work_item = WorkItem { - tidx = @|| vulkan_get_local_id(0) as i32, - tidy = @|| vulkan_get_local_id(1) as i32, - tidz = @|| vulkan_get_local_id(2) as i32, - bidx = @|| vulkan_get_group_id(0) as i32, - bidy = @|| vulkan_get_group_id(1) as i32, - bidz = @|| vulkan_get_group_id(2) as i32, - gidx = @|| vulkan_get_global_id(0) as i32, - gidy = @|| vulkan_get_global_id(1) as i32, - gidz = @|| vulkan_get_global_id(2) as i32, - bdimx = @|| vulkan_get_local_size(0) as i32, - bdimy = @|| vulkan_get_local_size(1) as i32, - bdimz = @|| vulkan_get_local_size(2) as i32, - gdimx = @|| vulkan_get_global_size(0) as i32, - gdimy = @|| vulkan_get_global_size(1) as i32, - gdimz = @|| vulkan_get_global_size(2) as i32, - nblkx = @|| vulkan_get_num_groups(0) as i32, - nblky = @|| vulkan_get_num_groups(1) as i32, - nblkz = @|| vulkan_get_num_groups(2) as i32 + tidx = @|| vulkan_get_local_id(0), + tidy = @|| vulkan_get_local_id(1), + tidz = @|| vulkan_get_local_id(2), + bidx = @|| vulkan_get_group_id(0), + bidy = @|| vulkan_get_group_id(1), + bidz = @|| vulkan_get_group_id(2), + gidx = @|| vulkan_get_global_id(0), + gidy = @|| vulkan_get_global_id(1), + gidz = @|| vulkan_get_global_id(2), + bdimx = @|| vulkan_get_local_size(0), + bdimy = @|| vulkan_get_local_size(1), + bdimz = @|| vulkan_get_local_size(2), + gdimx = @|| vulkan_get_global_size(0), + gdimy = @|| vulkan_get_global_size(1), + gdimz = @|| vulkan_get_global_size(2), + nblkx = @|| vulkan_get_num_groups(0), + nblky = @|| vulkan_get_num_groups(1), + nblkz = @|| vulkan_get_num_groups(2) }; spirv(dev, grid, block, || @body(work_item)) }, diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 26695283..584384a2 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -483,7 +483,7 @@ void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_para } } vkCmdPushConstants(cmd_buf, kernel->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128, &push_constants); - vkCmdDispatch(cmd_buf, launch_params.grid[0], launch_params.grid[1], launch_params.grid[2]); + vkCmdDispatch(cmd_buf, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2]); }); } From d4efe54617c1b10b2ab974370d98949eb9836b5d Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 6 May 2021 14:14:35 +0200 Subject: [PATCH 26/43] cute hack arround certain invocation id intrinsics --- platforms/artic/intrinsics_vulkan.impala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/platforms/artic/intrinsics_vulkan.impala b/platforms/artic/intrinsics_vulkan.impala index 7687da2a..5cb339a3 100644 --- a/platforms/artic/intrinsics_vulkan.impala +++ b/platforms/artic/intrinsics_vulkan.impala @@ -59,13 +59,15 @@ #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_global(&mut addrspace(1)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32; #[import(cc = "device", name = "get_work_dim")] fn vulkan_get_work_dim() -> u32; -#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> i32; +//#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> i32; +fn @vulkan_get_global_size(dim: u32) -> i32 = vulkan_get_local_size(dim) * vulkan_get_num_groups(dim); #[import(cc = "device", name = "get_global_id")] fn vulkan_get_global_id(u32) -> i32; #[import(cc = "device", name = "get_local_size")] fn vulkan_get_local_size(u32) -> i32; #[import(cc = "device", name = "get_local_id")] fn vulkan_get_local_id(u32) -> i32; #[import(cc = "device", name = "get_num_groups")] fn vulkan_get_num_groups(u32) -> i32; #[import(cc = "device", name = "get_group_id")] fn vulkan_get_group_id(u32) -> i32; -#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> i32; +//#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> i32; +fn @vulkan_get_global_offset(dim: u32) -> i32 = vulkan_get_group_id(dim) * vulkan_get_local_size(dim); #[import(cc = "device", name = "spirv.nonsemantic.printf")] fn shader_printf[T](T) -> (); From ca0456b792896407ad8f8f6fe9d3d09e4811826f Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 1 Dec 2022 11:50:22 +0100 Subject: [PATCH 27/43] Handle lack of CUDA devices gracefully --- src/cuda_platform.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cuda_platform.cpp b/src/cuda_platform.cpp index e02b78b3..5021b94c 100644 --- a/src/cuda_platform.cpp +++ b/src/cuda_platform.cpp @@ -73,6 +73,10 @@ CudaPlatform::CudaPlatform(Runtime* runtime) #endif CUresult err = cuInit(0); + if (err == CUDA_ERROR_NO_DEVICE) { + info("CUDA backend did not initialise because no devices were found (CUDA_ERROR_NO_DEVICE)."); + return; + } CHECK_CUDA(err, "cuInit()"); err = cuDeviceGetCount(&device_count); From e6b895ac47aa17f205524b1a1b3678681dc3db09 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 1 Dec 2022 12:57:03 +0100 Subject: [PATCH 28/43] added stub for shady runtime --- src/CMakeLists.txt | 11 +++++++++++ src/anydsl_runtime.cpp | 1 + src/anydsl_runtime_config.h.in | 1 + src/platform.h | 1 + src/runtime.cpp | 3 +++ src/shady_platform.cpp | 27 +++++++++++++++++++++++++++ src/shady_platform.h | 31 +++++++++++++++++++++++++++++++ 7 files changed, 75 insertions(+) create mode 100644 src/shady_platform.cpp create mode 100644 src/shady_platform.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c6e1cf32..7a3606f8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -128,6 +128,17 @@ if(hsa-runtime64_FOUND) endif() set(AnyDSL_runtime_HAS_HSA_SUPPORT ${hsa-runtime64_FOUND} CACHE INTERNAL "enables HSA support") +# look for shady +find_package(shady) +if (shady_FOUND) + find_package(Vulkan REQUIRED) + message("It's shading time") + add_library(runtime_shady STATIC shady_platform.cpp shady_platform.h) + target_link_libraries(runtime_shady PRIVATE shady::runtime) + list(APPEND RUNTIME_PLATFORMS runtime_shady) +endif() +set(AnyDSL_runtime_HAS_SHADY_SUPPORT ${shady_FOUND} CACHE INTERNAL "enables Shady support") + # look for LLVM for nvptx and gcn find_package(LLVM) if(LLVM_FOUND) diff --git a/src/anydsl_runtime.cpp b/src/anydsl_runtime.cpp index 42b392c1..3f02ee3a 100644 --- a/src/anydsl_runtime.cpp +++ b/src/anydsl_runtime.cpp @@ -35,6 +35,7 @@ struct RuntimeSingleton { register_cuda_platform(&runtime); register_opencl_platform(&runtime); register_hsa_platform(&runtime); + register_shady_platform(&runtime); } static std::pair detect_profile_level() { diff --git a/src/anydsl_runtime_config.h.in b/src/anydsl_runtime_config.h.in index 7cb1afa8..8f0590eb 100644 --- a/src/anydsl_runtime_config.h.in +++ b/src/anydsl_runtime_config.h.in @@ -9,6 +9,7 @@ #cmakedefine AnyDSL_runtime_HAS_CUDA_SUPPORT #cmakedefine AnyDSL_runtime_HAS_OPENCL_SUPPORT #cmakedefine AnyDSL_runtime_HAS_HSA_SUPPORT +#cmakedefine AnyDSL_runtime_HAS_SHADY_SUPPORT #cmakedefine AnyDSL_runtime_HAS_TBB_SUPPORT diff --git a/src/platform.h b/src/platform.h index c647a293..34fc2beb 100644 --- a/src/platform.h +++ b/src/platform.h @@ -12,6 +12,7 @@ void register_cpu_platform(Runtime*); void register_cuda_platform(Runtime*); void register_opencl_platform(Runtime*); void register_hsa_platform(Runtime*); +void register_shady_platform(Runtime*); /// A runtime platform. Exposes a set of devices, a copy function, /// and functions to allocate and release memory. diff --git a/src/runtime.cpp b/src/runtime.cpp index a13cec2f..49937f03 100644 --- a/src/runtime.cpp +++ b/src/runtime.cpp @@ -17,6 +17,9 @@ void register_opencl_platform(Runtime* runtime) { runtime->register_platformregister_platform("HSA"); } #endif +#ifndef AnyDSL_runtime_HAS_SHADY_SUPPORT +void register_shady_platform(Runtime* runtime) { runtime->register_platform("Shady"); } +#endif Runtime::Runtime(std::pair profile) : profile_(profile) diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp new file mode 100644 index 00000000..378e7f04 --- /dev/null +++ b/src/shady_platform.cpp @@ -0,0 +1,27 @@ +#include "shady_platform.h" + +ShadyPlatform::ShadyPlatform(Runtime *runtime) : Platform(runtime) { + info("hi"); +} + +ShadyPlatform::~ShadyPlatform() { + +} + +void * ShadyPlatform::alloc(DeviceId dev, int64_t size) {} +void * ShadyPlatform::alloc_host(DeviceId dev, int64_t size) {} +void * ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) {} +void * ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) {} +void ShadyPlatform::release(DeviceId dev, void *ptr) {} +void ShadyPlatform::release_host(DeviceId dev, void *ptr) {} + +void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) {} +void ShadyPlatform::synchronize(DeviceId dev) {} + +void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} +void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} +void ShadyPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) {} + +void register_shady_platform(Runtime* runtime) { + runtime->register_platform(); +} \ No newline at end of file diff --git a/src/shady_platform.h b/src/shady_platform.h new file mode 100644 index 00000000..733dd4ab --- /dev/null +++ b/src/shady_platform.h @@ -0,0 +1,31 @@ +#ifndef ANYDSL_RUNTIME_RUNTIME_SHADY_H +#define ANYDSL_RUNTIME_RUNTIME_SHADY_H + +#include "platform.h" + +class ShadyPlatform : public Platform { +public: + ShadyPlatform(Runtime* runtime); + ~ShadyPlatform() override; + + void* alloc(DeviceId dev, int64_t size) override; + void* alloc_host(DeviceId dev, int64_t size) override; + void* alloc_unified(DeviceId dev, int64_t size) override; + void* get_device_ptr(DeviceId dev, void* ptr) override; + void release(DeviceId dev, void* ptr) override; + void release_host(DeviceId dev, void* ptr) override; + + void launch_kernel(DeviceId dev, const LaunchParams& launch_params) override; + void synchronize(DeviceId dev) override; + + void copy(DeviceId dev_src, const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override; + void copy_from_host(const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override; + void copy_to_host(DeviceId dev_src, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) override; + + std::string name() const override { return "shady"; } + size_t dev_count() const override { return 1; } + const char * device_name(DeviceId dev) const override { return "TODO"; } + bool device_check_feature_support(DeviceId dev, const char* feature) const override { return false; } +}; + +#endif //ANYDSL_RUNTIME_RUNTIME_SHADY_H From 3f4d66d66b86db4cfa757d4d2fc2698a6de49e0e Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 2 Dec 2022 15:03:15 +0100 Subject: [PATCH 29/43] can run a trivial program --- src/CMakeLists.txt | 2 +- src/shady_platform.cpp | 26 +++++++++++++++++++------- src/shady_platform.h | 9 +++++++++ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a3606f8..30f8276c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -134,7 +134,7 @@ if (shady_FOUND) find_package(Vulkan REQUIRED) message("It's shading time") add_library(runtime_shady STATIC shady_platform.cpp shady_platform.h) - target_link_libraries(runtime_shady PRIVATE shady::runtime) + target_link_libraries(runtime_shady PUBLIC shady::runtime shady::api) list(APPEND RUNTIME_PLATFORMS runtime_shady) endif() set(AnyDSL_runtime_HAS_SHADY_SUPPORT ${shady_FOUND} CACHE INTERNAL "enables Shady support") diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp index 378e7f04..b5f70e78 100644 --- a/src/shady_platform.cpp +++ b/src/shady_platform.cpp @@ -1,21 +1,33 @@ #include "shady_platform.h" -ShadyPlatform::ShadyPlatform(Runtime *runtime) : Platform(runtime) { - info("hi"); +ShadyPlatform::ShadyPlatform(Runtime *r) : Platform(r) { + shady::RuntimeConfig cfg; + cfg.dump_spv = true; + cfg.use_validation = true; + shd_rt = shady::initialize_runtime(cfg); } ShadyPlatform::~ShadyPlatform() { + shady::shutdown_runtime(shd_rt); +} +void* ShadyPlatform::alloc(DeviceId dev, int64_t size) { + auto device = shady::get_device(shd_rt, dev); + shady::Buffer* buf = shady::allocate_buffer_device(device, size); } -void * ShadyPlatform::alloc(DeviceId dev, int64_t size) {} -void * ShadyPlatform::alloc_host(DeviceId dev, int64_t size) {} -void * ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) {} -void * ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) {} +void* ShadyPlatform::alloc_host(DeviceId dev, int64_t size) {} +void* ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) {} +void* ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) {} void ShadyPlatform::release(DeviceId dev, void *ptr) {} void ShadyPlatform::release_host(DeviceId dev, void *ptr) {} -void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) {} +void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { + std::string program_src = runtime_->load_file(launch_params.file_name); + shady::Program* program = shady::load_program(shd_rt, program_src.c_str()); + auto device = shady::get_device(shd_rt, dev); + shady::launch_kernel(program, device, launch_params.grid[0], launch_params.grid[1], launch_params.grid[2], 0, nullptr); +} void ShadyPlatform::synchronize(DeviceId dev) {} void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} diff --git a/src/shady_platform.h b/src/shady_platform.h index 733dd4ab..f9d2fa91 100644 --- a/src/shady_platform.h +++ b/src/shady_platform.h @@ -3,6 +3,12 @@ #include "platform.h" +namespace shady { +extern "C" { +#include "shady/runtime.h" +} +} + class ShadyPlatform : public Platform { public: ShadyPlatform(Runtime* runtime); @@ -26,6 +32,9 @@ class ShadyPlatform : public Platform { size_t dev_count() const override { return 1; } const char * device_name(DeviceId dev) const override { return "TODO"; } bool device_check_feature_support(DeviceId dev, const char* feature) const override { return false; } + +private: + shady::Runtime* shd_rt; }; #endif //ANYDSL_RUNTIME_RUNTIME_SHADY_H From 804caa394c0d412e9aafff6082d4c2c13d8e8cfd Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 2 Dec 2022 15:34:01 +0100 Subject: [PATCH 30/43] corrected grid size --- src/shady_platform.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp index b5f70e78..37761523 100644 --- a/src/shady_platform.cpp +++ b/src/shady_platform.cpp @@ -26,7 +26,9 @@ void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_param std::string program_src = runtime_->load_file(launch_params.file_name); shady::Program* program = shady::load_program(shd_rt, program_src.c_str()); auto device = shady::get_device(shd_rt, dev); - shady::launch_kernel(program, device, launch_params.grid[0], launch_params.grid[1], launch_params.grid[2], 0, nullptr); + shady::Dispatch* d = shady::launch_kernel(program, device, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2], 0, nullptr); + assert(d); + shady::wait_completion(d); } void ShadyPlatform::synchronize(DeviceId dev) {} From 0fa2d561cf707947539a32659256cd9dc70fe986 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 8 Oct 2025 18:13:31 +0200 Subject: [PATCH 31/43] implement missing methods in VulkanPlatform --- src/vulkan_platform.cpp | 13 +++++++------ src/vulkan_platform.h | 4 ++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 584384a2..81b08454 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -97,12 +97,9 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .pNext = nullptr, .minImportedHostPointerAlignment = 0xDEADBEEF, }; - auto device_properties2 = VkPhysicalDeviceProperties2 { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = &external_memory_host_properties, - }; - vkGetPhysicalDeviceProperties2(physical_device, &device_properties2); - auto& device_properties = device_properties2.properties; + + vkGetPhysicalDeviceProperties2(physical_device, &properties); + auto& device_properties = properties.properties; debug(" GPU%:", device_id); debug(" Device name: %", device_properties.deviceName); @@ -685,6 +682,10 @@ void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t off vkDestroyBuffer(device->device, tmp_buffer, nullptr); } +const char *VulkanPlatform::device_name(DeviceId dev) const { + return usable_devices[dev]->properties.properties.deviceName; +} + void register_vulkan_platform(Runtime* runtime) { runtime->register_platform(); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 7b8a7b90..a4e5f110 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -40,6 +40,9 @@ class VulkanPlatform : public Platform { size_t dev_count() const override { return usable_devices.size(); } std::string name() const override { return "Vulkan"; } + const char* device_name(DeviceId dev) const override; + bool device_check_feature_support(DeviceId, const char*) const override { return false; } + struct Device; struct Resource { @@ -86,6 +89,7 @@ class VulkanPlatform : public Platform { VulkanPlatform& platform; VkPhysicalDevice physical_device; + VkPhysicalDeviceProperties2 properties; size_t device_id; VkDevice device = nullptr; From 98ff22c9d8b5f0b5d786b5fb6f7a0f009ec19ccc Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 9 Oct 2025 10:03:44 +0200 Subject: [PATCH 32/43] cmake: add Vulkan platform files --- cmake/anydsl_runtime-config.cmake.in | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/anydsl_runtime-config.cmake.in b/cmake/anydsl_runtime-config.cmake.in index 97998ea0..6033e9a6 100644 --- a/cmake/anydsl_runtime-config.cmake.in +++ b/cmake/anydsl_runtime-config.cmake.in @@ -277,6 +277,7 @@ function(anydsl_runtime_wrap outfiles) ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_amdgpu.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_opencl.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_thorin.impala + ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_vulkan.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/runtime.impala ${_additional_platform_files}) From 1e09aff2288b0dbc107d4ed6852f44137f0d3f02 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 9 Oct 2025 10:03:54 +0200 Subject: [PATCH 33/43] wire up vulkan intrinsics based off SPIR-V --- platforms/artic/intrinsics_thorin.impala | 1 + platforms/artic/intrinsics_vulkan.impala | 58 +++++++++++------------- platforms/artic/runtime.impala | 2 + 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/platforms/artic/intrinsics_thorin.impala b/platforms/artic/intrinsics_thorin.impala index c91210a1..bc92aa7f 100644 --- a/platforms/artic/intrinsics_thorin.impala +++ b/platforms/artic/intrinsics_thorin.impala @@ -19,6 +19,7 @@ #[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn levelzero(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); +#[import(cc = "thorin")] fn vulkan_cs(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T]; #[import(cc = "thorin")] fn hls(_dev: i32, _body: fn() -> ()) -> (); #[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend diff --git a/platforms/artic/intrinsics_vulkan.impala b/platforms/artic/intrinsics_vulkan.impala index 5cb339a3..6f742cde 100644 --- a/platforms/artic/intrinsics_vulkan.impala +++ b/platforms/artic/intrinsics_vulkan.impala @@ -58,42 +58,38 @@ #[import(cc = "device", name = "atomic_add")] fn vulkan_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_global(&mut addrspace(1)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn vulkan_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32; -#[import(cc = "device", name = "get_work_dim")] fn vulkan_get_work_dim() -> u32; -//#[import(cc = "device", name = "get_global_size")] fn vulkan_get_global_size(u32) -> i32; -fn @vulkan_get_global_size(dim: u32) -> i32 = vulkan_get_local_size(dim) * vulkan_get_num_groups(dim); -#[import(cc = "device", name = "get_global_id")] fn vulkan_get_global_id(u32) -> i32; -#[import(cc = "device", name = "get_local_size")] fn vulkan_get_local_size(u32) -> i32; -#[import(cc = "device", name = "get_local_id")] fn vulkan_get_local_id(u32) -> i32; -#[import(cc = "device", name = "get_num_groups")] fn vulkan_get_num_groups(u32) -> i32; -#[import(cc = "device", name = "get_group_id")] fn vulkan_get_group_id(u32) -> i32; -//#[import(cc = "device", name = "get_global_offset")] fn vulkan_get_global_offset(u32) -> i32; -fn @vulkan_get_global_offset(dim: u32) -> i32 = vulkan_get_group_id(dim) * vulkan_get_local_size(dim); -#[import(cc = "device", name = "spirv.nonsemantic.printf")] fn shader_printf[T](T) -> (); +fn spv_vk_get_num_groups() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](24 /* BuiltInNumWorkgroups */); +fn spv_vk_get_local_size() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](25 /* BuiltInWorkgroupSize */); +fn spv_vk_get_group_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](26 /* BuiltInWorkgroupId */); +fn spv_vk_get_local_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](27 /* BuiltInLocalInvocationId */); +fn spv_vk_get_global_id() = *spirv_get_builtin[&mut addrspace(8) simd[u32 * 3]](28 /* BuiltInGlobalInvocationId */); + +fn @vulkan_get_global_size(dim: u32) -> i32 = (spv_vk_get_local_size()(dim) * spv_vk_get_num_groups()(dim)) as i32; fn @vulkan_accelerator(dev: i32) = Accelerator { exec = @|body| |grid, block| { let work_item = WorkItem { - tidx = @|| vulkan_get_local_id(0), - tidy = @|| vulkan_get_local_id(1), - tidz = @|| vulkan_get_local_id(2), - bidx = @|| vulkan_get_group_id(0), - bidy = @|| vulkan_get_group_id(1), - bidz = @|| vulkan_get_group_id(2), - gidx = @|| vulkan_get_global_id(0), - gidy = @|| vulkan_get_global_id(1), - gidz = @|| vulkan_get_global_id(2), - bdimx = @|| vulkan_get_local_size(0), - bdimy = @|| vulkan_get_local_size(1), - bdimz = @|| vulkan_get_local_size(2), - gdimx = @|| vulkan_get_global_size(0), - gdimy = @|| vulkan_get_global_size(1), - gdimz = @|| vulkan_get_global_size(2), - nblkx = @|| vulkan_get_num_groups(0), - nblky = @|| vulkan_get_num_groups(1), - nblkz = @|| vulkan_get_num_groups(2) + tidx = @|| spv_vk_get_local_id()(0) as i32, + tidy = @|| spv_vk_get_local_id()(1) as i32, + tidz = @|| spv_vk_get_local_id()(2) as i32, + bidx = @|| spv_vk_get_group_id()(0) as i32, + bidy = @|| spv_vk_get_group_id()(1) as i32, + bidz = @|| spv_vk_get_group_id()(2) as i32, + gidx = @|| spv_vk_get_global_id()(0) as i32, + gidy = @|| spv_vk_get_global_id()(1) as i32, + gidz = @|| spv_vk_get_global_id()(2) as i32, + bdimx = @|| spv_vk_get_local_size()(0) as i32, + bdimy = @|| spv_vk_get_local_size()(1) as i32, + bdimz = @|| spv_vk_get_local_size()(2) as i32, + gdimx = @|| vulkan_get_global_size(0) as i32, + gdimy = @|| vulkan_get_global_size(1) as i32, + gdimz = @|| vulkan_get_global_size(2) as i32, + nblkx = @|| spv_vk_get_num_groups()(0) as i32, + nblky = @|| spv_vk_get_num_groups()(1) as i32, + nblkz = @|| spv_vk_get_num_groups()(2) as i32 }; - spirv(dev, grid, block, || @body(work_item)) + vulkan_cs(dev, grid, block, || @body(work_item)) }, sync = @|| synchronize_vulkan(dev), alloc = @|size| alloc_vulkan(dev, size), @@ -101,7 +97,7 @@ fn @vulkan_accelerator(dev: i32) = Accelerator { barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE), }; -static vk_intrinsics = Intrinsics { +static vulkan_intrinsics = Intrinsics { expf = vulkan_expf, exp2f = vulkan_exp2f, logf = vulkan_logf, diff --git a/platforms/artic/runtime.impala b/platforms/artic/runtime.impala index 456a9412..cedd5b30 100644 --- a/platforms/artic/runtime.impala +++ b/platforms/artic/runtime.impala @@ -123,6 +123,8 @@ fn @alloc_levelzero(dev: i32, size: i64) = alloc(runtime_device(5, dev), size); fn @alloc_levelzero_host(dev: i32, size: i64) = alloc_host(runtime_device(5, dev), size); fn @alloc_levelzero_unified(dev: i32, size: i64) = alloc_unified(runtime_device(5, dev), size); fn @synchronize_levelzero(dev: i32) = runtime_synchronize(runtime_device(5, dev)); +fn @synchronize_vulkan(dev: i32) = runtime_synchronize(runtime_device(6, dev)); +fn @alloc_vulkan(dev: i32, size: i64) = alloc(runtime_device(6, dev), size); fn @copy(src: Buffer, dst: Buffer) = runtime_copy(src.device, src.data, 0, dst.device, dst.data, 0, src.size); fn @copy_offset(src: Buffer, off_src: i64, dst: Buffer, off_dst: i64, size: i64) = runtime_copy(src.device, src.data, off_src, dst.device, dst.data, off_dst, size); From d19428c1092f3086f35ada0069ff2e3b02a56aee Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 9 Oct 2025 16:50:05 +0200 Subject: [PATCH 34/43] updated shady API and implement more stuff --- src/shady_platform.cpp | 120 +++++++++++++++++++++++++++++++++++------ src/shady_platform.h | 14 ++++- 2 files changed, 115 insertions(+), 19 deletions(-) diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp index 37761523..7afe060d 100644 --- a/src/shady_platform.cpp +++ b/src/shady_platform.cpp @@ -1,40 +1,126 @@ #include "shady_platform.h" + +using namespace shady; + +struct ShadyBuffer { + ShadyPlatform::ShadyDevice& device_; + shady::Buffer* handle_; + size_t size_; + + ShadyBuffer(ShadyPlatform::ShadyDevice& device, size_t size); +}; + +struct ShadyProgram { + ShadyPlatform::ShadyDevice& device_; + shady::Module* module_; + shady::Program* handle_; + + ShadyProgram(ShadyPlatform::ShadyDevice&, std::string); +}; + +struct ShadyPlatform::ShadyDevice { + ShadyPlatform& platform_; + DeviceId id_; + shady::Device* handle_; + shady::TargetConfig target_config_; + + std::unordered_map> buffers_; + std::unordered_map> programs_; + + ShadyDevice(ShadyPlatform& platform, DeviceId id) : platform_(platform), id_(id) { + handle_ = shd_rn_get_device(platform.runner_, id); + target_config_ = shd_rn_get_device_target_config(&platform_.compiler_config_, handle_); + } + + ShadyProgram& load_program(std::string filename); +}; + +ShadyBuffer::ShadyBuffer(ShadyPlatform::ShadyDevice& device, size_t size) : device_(device), size_(size) { + handle_ = shady::shd_rn_allocate_buffer_device(device_.handle_, size); +} + ShadyPlatform::ShadyPlatform(Runtime *r) : Platform(r) { - shady::RuntimeConfig cfg; + shady::RunnerConfig cfg; cfg.dump_spv = true; cfg.use_validation = true; - shd_rt = shady::initialize_runtime(cfg); + + runner_ = shady::shd_rn_initialize(cfg); + for (size_t i = 0; i < shd_rn_device_count(runner_); i++) { + devices_.emplace_back(std::make_unique(*this, (DeviceId) i)); + } } ShadyPlatform::~ShadyPlatform() { - shady::shutdown_runtime(shd_rt); + shd_rn_shutdown(runner_); } void* ShadyPlatform::alloc(DeviceId dev, int64_t size) { - auto device = shady::get_device(shd_rt, dev); - shady::Buffer* buf = shady::allocate_buffer_device(device, size); + auto& device = devices_[dev]; + auto buffer = std::make_unique(*device, (size_t) size); + uint64_t device_address = shd_rn_get_buffer_device_pointer(buffer->handle_); + device->buffers_[device_address] = std::move(buffer); + return reinterpret_cast(device_address); +} + +void* ShadyPlatform::alloc_host(DeviceId dev, int64_t size) { + assert(false); +} + +void* ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) { + assert(false); +} + +void* ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) { + assert(false); +} + +void ShadyPlatform::release(DeviceId dev, void *ptr) { + auto& device = devices_[dev]; + device->buffers_.erase((uint64_t) ptr); +} + +void ShadyPlatform::release_host(DeviceId dev, void *ptr) { + assert(false); +} + +ShadyProgram& ShadyPlatform::ShadyDevice::load_program(std::string filename) { + if (auto found = programs_.find(filename); found != programs_.end()) + return *found->second; + return *(programs_[filename] = std::make_unique(*this, filename)); } -void* ShadyPlatform::alloc_host(DeviceId dev, int64_t size) {} -void* ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) {} -void* ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) {} -void ShadyPlatform::release(DeviceId dev, void *ptr) {} -void ShadyPlatform::release_host(DeviceId dev, void *ptr) {} +ShadyProgram::ShadyProgram(ShadyPlatform::ShadyDevice& device, std::string file_name) : device_(device) { + std::string program_src = device_.platform_.runtime_->load_file(file_name); + shd_driver_load_source_file(&device_.platform_.compiler_config_, &device_.target_config_, SrcSPIRV, program_src.size(), program_src.c_str(), "test", &module_); + handle_ = shd_rn_new_program_from_module(device_.platform_.runner_, &device_.platform_.compiler_config_, module_); +} void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { - std::string program_src = runtime_->load_file(launch_params.file_name); - shady::Program* program = shady::load_program(shd_rt, program_src.c_str()); - auto device = shady::get_device(shd_rt, dev); - shady::Dispatch* d = shady::launch_kernel(program, device, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2], 0, nullptr); + auto& device = devices_[dev]; + auto& program = device->load_program(launch_params.file_name); + + std::vector args; + + shady::Command* d = shady::shd_rn_launch_kernel(program.handle_, device->handle_, launch_params.kernel_name, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2], args.size(), args.data(), nullptr); assert(d); - shady::wait_completion(d); + shady::shd_rn_wait_completion(d); } void ShadyPlatform::synchronize(DeviceId dev) {} void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} -void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} -void ShadyPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) {} + +void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { + auto& dst_device = devices_[dev_dst]; + auto& dst_buffer = dst_device->buffers_[(uint64_t) dst]; + shd_rn_copy_to_buffer(dst_buffer->handle_, offset_dst, (char*) src + offset_src, size); +} + +void ShadyPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { + auto& src_device = devices_[dev_src]; + auto& src_buffer = src_device->buffers_[(uint64_t) src]; + shd_rn_copy_from_buffer(src_buffer->handle_, offset_dst, (char*) dst + offset_dst, size); +} void register_shady_platform(Runtime* runtime) { runtime->register_platform(); diff --git a/src/shady_platform.h b/src/shady_platform.h index f9d2fa91..b4271253 100644 --- a/src/shady_platform.h +++ b/src/shady_platform.h @@ -5,10 +5,13 @@ namespace shady { extern "C" { -#include "shady/runtime.h" +#include "shady/runner/runner.h" +#include "shady/driver.h" } } +struct ShadyProgram; + class ShadyPlatform : public Platform { public: ShadyPlatform(Runtime* runtime); @@ -33,8 +36,15 @@ class ShadyPlatform : public Platform { const char * device_name(DeviceId dev) const override { return "TODO"; } bool device_check_feature_support(DeviceId dev, const char* feature) const override { return false; } + struct ShadyDevice; private: - shady::Runtime* shd_rt; + shady::CompilerConfig compiler_config_ = shady::shd_default_compiler_config(); + shady::Runner* runner_; + + std::vector> devices_; + + friend ShadyDevice; + friend ShadyProgram; }; #endif //ANYDSL_RUNTIME_RUNTIME_SHADY_H From c00bfc7991dcd964d04e398ffe04c2e76cdbe5aa Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Thu, 9 Oct 2025 21:49:32 +0200 Subject: [PATCH 35/43] shady: minor fixes --- src/shady_platform.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp index 7afe060d..05e28389 100644 --- a/src/shady_platform.cpp +++ b/src/shady_platform.cpp @@ -45,6 +45,8 @@ ShadyPlatform::ShadyPlatform(Runtime *r) : Platform(r) { cfg.dump_spv = true; cfg.use_validation = true; + compiler_config_.dynamic_scheduling = false; + runner_ = shady::shd_rn_initialize(cfg); for (size_t i = 0; i < shd_rn_device_count(runner_); i++) { devices_.emplace_back(std::make_unique(*this, (DeviceId) i)); @@ -101,14 +103,21 @@ void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_param auto& program = device->load_program(launch_params.file_name); std::vector args; + for (uint32_t argIdx = 0; argIdx < launch_params.num_args; ++argIdx) { + args.push_back(launch_params.args.data[argIdx]); + //WRAP_LEVEL_ZERO(zeKernelSetArgumentValue(hKernel, argIdx, launch_params.args.sizes[argIdx], launch_params.args.data[argIdx])); + } shady::Command* d = shady::shd_rn_launch_kernel(program.handle_, device->handle_, launch_params.kernel_name, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2], args.size(), args.data(), nullptr); assert(d); shady::shd_rn_wait_completion(d); } + void ShadyPlatform::synchronize(DeviceId dev) {} -void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) {} +void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { + assert(false); +} void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { auto& dst_device = devices_[dev_dst]; @@ -119,7 +128,7 @@ void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId void ShadyPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { auto& src_device = devices_[dev_src]; auto& src_buffer = src_device->buffers_[(uint64_t) src]; - shd_rn_copy_from_buffer(src_buffer->handle_, offset_dst, (char*) dst + offset_dst, size); + shd_rn_copy_from_buffer(src_buffer->handle_, offset_src, (char*) dst + offset_dst, size); } void register_shady_platform(Runtime* runtime) { From e33fb40df7687499f0d60dcbc3d4131746778752 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 17 Oct 2025 17:15:13 +0200 Subject: [PATCH 36/43] modernizing the old Vulkan runtime --- src/CMakeLists.txt | 1 + src/shady_platform.cpp | 1 - src/vulkan_platform.cpp | 394 ++++++++++++++++------------------------ src/vulkan_platform.h | 91 ++++++---- 4 files changed, 214 insertions(+), 273 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b1fbb7fa..2c7d90ce 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -152,6 +152,7 @@ set(AnyDSL_runtime_HAS_SHADY_SUPPORT ${shady_FOUND} CACHE INTERNAL "enables Shad find_package(Vulkan) if(Vulkan_FOUND) + find_package(shady REQUIRED) add_library(runtime_vulkan STATIC vulkan_platform.cpp vulkan_platform.h) target_include_directories(runtime_vulkan PRIVATE ${Vulkan_INCLUDE_DIRS}) target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES}) diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp index 05e28389..de29824b 100644 --- a/src/shady_platform.cpp +++ b/src/shady_platform.cpp @@ -1,6 +1,5 @@ #include "shady_platform.h" - using namespace shady; struct ShadyBuffer { diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 81b08454..2c3b1a7c 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -8,6 +8,13 @@ const auto khr_validation = "VK_LAYER_KHRONOS_validation"; error("error, failed %", #stuff); \ } +template +void insert_pnext(T& base, U& append) { + assert(base.pNext == nullptr); + append.pNext = base.pNext; + base.pNext = &append; +} + inline std::vector query_layers_available() { uint32_t count; vkEnumerateInstanceLayerProperties(&count, nullptr); @@ -57,6 +64,7 @@ VulkanPlatform::VulkanPlatform(Runtime* runtime) : Platform(runtime) { validation_done: auto app_info = VkApplicationInfo { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "AnyDSL Runtime", .apiVersion = VK_API_VERSION_1_2, }; @@ -91,25 +99,7 @@ VulkanPlatform::~VulkanPlatform() { } VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id) - : platform(platform), physical_device(physical_device), device_id(device_id) { - auto external_memory_host_properties = VkPhysicalDeviceExternalMemoryHostPropertiesEXT { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, - .pNext = nullptr, - .minImportedHostPointerAlignment = 0xDEADBEEF, - }; - - vkGetPhysicalDeviceProperties2(physical_device, &properties); - auto& device_properties = properties.properties; - - debug(" GPU%:", device_id); - debug(" Device name: %", device_properties.deviceName); - debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(device_properties.apiVersion), VK_VERSION_MINOR(device_properties.apiVersion), VK_VERSION_PATCH(device_properties.apiVersion)); - - min_imported_host_ptr_alignment = external_memory_host_properties.minImportedHostPointerAlignment; - debug(" Min imported host ptr alignment: %", min_imported_host_ptr_alignment); - if (min_imported_host_ptr_alignment == 0xDEADBEEF) - error("Device does not report minimum host pointer alignment"); - +: platform(platform), physical_device(physical_device), device_id(device_id) { uint32_t exts_count; vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); std::vector available_device_extensions(exts_count); @@ -123,9 +113,23 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic // Use this to import host memory as GPU-visible memory, otherwise use a fallback path that copies when uploading/downloading if (is_ext_available(available_device_extensions, "VK_EXT_external_memory_host")) { enabled_device_extensions.push_back("VK_EXT_external_memory_host"); + insert_pnext(properties, external_memory_host_properties); can_import_host_memory = true; } + vkGetPhysicalDeviceProperties2(physical_device, &properties); + auto& device_properties = properties.properties; + + debug(" GPU%:", device_id); + debug(" Device name: %", device_properties.deviceName); + debug(" Vulkan version %.%.%", VK_VERSION_MAJOR(device_properties.apiVersion), VK_VERSION_MINOR(device_properties.apiVersion), VK_VERSION_PATCH(device_properties.apiVersion)); + + if (can_import_host_memory) { + debug(" Min imported host ptr alignment: %", external_memory_host_properties.minImportedHostPointerAlignment); + if (external_memory_host_properties.minImportedHostPointerAlignment == 0xFFFFFFFF) + error("Device does not report minimum host pointer alignment"); + } + uint32_t queue_families_count; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_families_count, nullptr); std::vector queue_families(queue_families_count); @@ -193,8 +197,8 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .ppEnabledExtensionNames = enabled_device_extensions.data(), .pEnabledFeatures = nullptr // controlled via VkPhysicalDeviceFeatures2 }; - CHECK(vkCreateDevice(physical_device, &device_create_info, nullptr, &device)); - vkGetDeviceQueue(device, compute_queue_family, 0, &queue); + CHECK(vkCreateDevice(physical_device, &device_create_info, nullptr, &handle_)); + vkGetDeviceQueue(handle_, compute_queue_family, 0, &queue); auto cmd_pool_create_info = VkCommandPoolCreateInfo { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, @@ -202,67 +206,40 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = (uint32_t) compute_queue_family, }; - CHECK(vkCreateCommandPool(device, &cmd_pool_create_info, nullptr, &cmd_pool)); + CHECK(vkCreateCommandPool(handle_, &cmd_pool_create_info, nullptr, &cmd_pool)); // Load function pointers -#define f(s) extension_fns.s = (PFN_##s) vkGetDeviceProcAddr(device, #s); +#define f(s) extension_fns.s = (PFN_##s) vkGetDeviceProcAddr(handle_, #s); DevicesExtensionsFunctions(f) #undef f } VulkanPlatform::Device::~Device() { - vkDestroyCommandPool(device, cmd_pool, nullptr); + vkDestroyCommandPool(handle_, cmd_pool, nullptr); kernels.clear(); - if (!resources.empty()) { - info("Some vulkan resources were not released. Releasing those automatically..."); - resources.clear(); - } - if (device != nullptr) - vkDestroyDevice(device, nullptr); + //if (!resources.empty()) { + // info("Some vulkan resources were not released. Releasing those automatically..."); + // resources.clear(); + //} + vkDestroyDevice(handle_, nullptr); } -uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits, VulkanPlatform::Device::AllocHeap heap) { +uint32_t VulkanPlatform::Device::find_suitable_memory_type(uint32_t memory_type_bits, VkMemoryPropertyFlags memory_flags, VkMemoryHeapFlags heap_flags) { VkPhysicalDeviceMemoryProperties device_memory_properties; vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); for (size_t bit = 0; bit < 32; bit++) { auto& memory_type = device_memory_properties.memoryTypes[bit]; auto& memory_heap = device_memory_properties.memoryHeaps[memory_type.heapIndex]; - bool is_host_visible = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; - bool is_host_coherent = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; - bool is_device_local = (memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; - if ((memory_type_bits & (1 << bit)) != 0) { - switch (heap) { - case AllocHeap::DEVICE_LOCAL: - if (is_device_local) return bit; - break; - case AllocHeap::HOST_VISIBLE: - if (is_host_visible && is_host_coherent) return bit; - break; - } + if ((memory_type.propertyFlags & memory_flags) == memory_flags && (memory_heap.flags & heap_flags) == heap_flags) + return bit; } } assert(false && "Unable to find a suitable memory type"); } -std::pair VulkanPlatform::Device::allocate_buffer(int64_t size, VkBufferUsageFlags usage_flags, AllocHeap heap) { - auto buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = usage_flags, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - VkBuffer buffer; - vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer); - - VkMemoryRequirements memory_requirements; - vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); - +VkDeviceMemory VulkanPlatform::Device::allocate_memory(VkDeviceSize size, uint32_t memory_type_bits, VkMemoryPropertyFlags memory_flags, VkMemoryHeapFlags heap_flags) { auto allocate_flags = VkMemoryAllocateFlagsInfo { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, .pNext = nullptr, @@ -273,127 +250,115 @@ std::pair VulkanPlatform::Device::allocate_buffer(int6 auto allocation_info = VkMemoryAllocateInfo { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &allocate_flags, - .allocationSize = (VkDeviceSize) memory_requirements.size, // the driver might want padding ! - .memoryTypeIndex = find_suitable_memory_type(memory_requirements.memoryTypeBits, heap), + .allocationSize = size, // the driver might want padding ! + .memoryTypeIndex = find_suitable_memory_type(memory_type_bits, memory_flags, heap_flags), }; VkDeviceMemory memory; - vkAllocateMemory(device, &allocation_info, nullptr, &memory); - vkBindBufferMemory(device, buffer, memory, 0); + vkAllocateMemory(handle_, &allocation_info, nullptr, &memory); - return std::make_pair(buffer, memory); + return memory; } -VulkanPlatform::Buffer* VulkanPlatform::Device::create_buffer_resource(int64_t size, VkBufferUsageFlags usage_flags, AllocHeap heap) { - auto [buffer, memory] = allocate_buffer(size, usage_flags, heap); - - size_t id = next_resource_id++; - - std::unique_ptr res_buffer = std::make_unique(*this); - res_buffer->alloc = memory; - res_buffer->id = id; - res_buffer->buffer = buffer; - - auto bda_info = VkBufferDeviceAddressInfoKHR { - .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, +VulkanPlatform::Buffer::Buffer(Device& device, size_t size, BackingStorage backing, VkBufferUsageFlags2 usage) : Resource(device) { + VkBufferCreateInfo buffer_create_info { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, - .buffer = res_buffer->buffer + .flags = 0, + .size = (VkDeviceSize) size, + .usage = static_cast(usage), + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, }; - VkDeviceAddress bda = extension_fns.vkGetBufferDeviceAddressKHR(device, &bda_info); - assert(bda != 0 && "vkGetBufferDeviceAddress failed"); - res_buffer->bda = bda; - resources.push_back(std::move(res_buffer)); + auto create_buffer = [&]() { vkCreateBuffer(device.handle_, &buffer_create_info, nullptr, &handle_); }; + + if (const auto* import_host = std::get_if(&backing)) { + VkExternalMemoryBufferCreateInfo external_mem_buffer_create_info { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .pNext = nullptr, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT + }; + insert_pnext(buffer_create_info, external_mem_buffer_create_info); + create_buffer(); + + VkDeviceMemory imported_memory; + size_t imported_offset; + std::tie(imported_memory, imported_offset) = device.import_host_memory(import_host->host_memory_, size); + vkBindBufferMemory(device.handle_, handle_, imported_memory, imported_offset); + } else if (std::get_if(&backing)) { + create_buffer(); + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(device.handle_, handle_, &memory_requirements); + device_memory_ = device.allocate_memory(memory_requirements.size, memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + vkBindBufferMemory(device.handle_, handle_, device_memory_, 0); + } else if (std::get_if(&backing)) { + create_buffer(); + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(device.handle_, handle_, &memory_requirements); + device_memory_ = device.allocate_memory(memory_requirements.size, memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + vkBindBufferMemory(device.handle_, handle_, device_memory_, 0); + } else { + abort(); + } - return reinterpret_cast(resources.back().get()); + if (usage & VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfoKHR bda_info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, + .pNext = nullptr, + .buffer = handle_ + }; + device_address_ = device.extension_fns.vkGetBufferDeviceAddressKHR(device.handle_, &bda_info); + assert(device_address_ != 0 && "vkGetBufferDeviceAddress failed"); + } } -constexpr VkBufferUsageFlags general_purpose_buffer_flags = - VK_BUFFER_USAGE_TRANSFER_SRC_BIT - | VK_BUFFER_USAGE_TRANSFER_DST_BIT - | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT - | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR - ; +uint64_t VulkanPlatform::Device::create_buffer_resource(size_t size, Buffer::BackingStorage backing, VkBufferUsageFlags usage) { + std::unique_ptr buffer = std::make_unique(*this, size, backing, usage); + + assert(buffer->device_address_); + auto& b = *(buffers_[buffer->device_address_] = std::move(buffer)); + + return b.device_address_; +} void* VulkanPlatform::alloc(DeviceId dev, int64_t size) { auto& device = usable_devices[dev]; - auto resource = device->create_buffer_resource(size, general_purpose_buffer_flags, VulkanPlatform::Device::AllocHeap::DEVICE_LOCAL); - return (void*) ((size_t) resource->bda); + return reinterpret_cast(device->create_buffer_resource(size, Buffer::DeviceMemory(), Buffer::ALL_BUFFER_USAGE)); } void* VulkanPlatform::alloc_host(DeviceId dev, int64_t size) { auto& device = usable_devices[dev]; - if (device->can_import_host_memory) - return malloc(size); - else { - auto id = device->create_buffer_resource(size, general_purpose_buffer_flags, VulkanPlatform::Device::AllocHeap::HOST_VISIBLE); - // TODO map it - assert(false); - } + return reinterpret_cast(device->create_buffer_resource(size, Buffer::HostMemory(), Buffer::ALL_BUFFER_USAGE)); } void* VulkanPlatform::get_device_ptr(DeviceId dev, void *ptr) { command_unavailable("get_device_ptr"); } -VulkanPlatform::Resource* VulkanPlatform::Device::find_resource_by_id(size_t id) { - size_t i = 0; - for (auto& resource : resources) { - if (resource->id == id) { - return resources[i].get(); - } - i++; - } - assert(false && "cannot find resource"); - return nullptr; -} - -VulkanPlatform::Buffer* VulkanPlatform::Device::find_buffer_by_device_address(uint64_t bda) { - size_t i = 0; - for (auto& resource : resources) { - if (auto buffer = dynamic_cast(resource.get()); buffer->bda == bda) { - return buffer; - } - i++; - } - assert(false && "cannot find resource"); - return nullptr; -} - -VulkanPlatform::Buffer* VulkanPlatform::Device::find_buffer_by_host_address(size_t host_address) { - size_t i = 0; - for (auto& resource : resources) { - if (auto buffer = dynamic_cast(resource.get()); buffer->mapped_host_address == host_address) { - return buffer; - } - i++; - } - assert(false && "cannot find resource"); - return nullptr; -} - void VulkanPlatform::release(DeviceId dev, void *ptr) { if (ptr == nullptr) return; auto& device = usable_devices[dev]; - auto bda = reinterpret_cast(ptr); - size_t i = 0; - for (auto& resource : device->resources) { - if (auto buffer = dynamic_cast(resource.get()); buffer->bda == bda) { - device->resources.erase(device->resources.begin() + i); - return; - } - i++; + auto found = device->buffers_.find(reinterpret_cast(ptr)); + + if (found != device->buffers_.end()) { + device->buffers_.erase(found); + return; } + assert(false && "Could not find such a buffer to release"); } void VulkanPlatform::release_host(DeviceId dev, void *ptr) { - auto& device = usable_devices[dev]; - if (device->can_import_host_memory) - free(ptr); - else - release(dev, ptr); + release(dev, ptr); +} + +VulkanPlatform::Buffer::~Buffer() { + if (device_memory_) + vkFreeMemory(device_.handle_, device_memory_, nullptr); + vkDestroyBuffer(device_.handle_, handle_, nullptr); } VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { @@ -410,7 +375,7 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f .codeSize = bin.size(), .pCode = reinterpret_cast(bin.c_str()), }; - CHECK(vkCreateShaderModule(device, &shader_module_create_info, nullptr, &kernel->shader_module)); + CHECK(vkCreateShaderModule(handle_, &shader_module_create_info, nullptr, &kernel->shader_module)); auto stage = VkPipelineShaderStageCreateInfo { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -438,7 +403,7 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f .pushConstantRangeCount = (uint32_t) push_constants.size(), .pPushConstantRanges = push_constants.data(), }; - CHECK(vkCreatePipelineLayout(device, &layout_create_info, nullptr, &kernel-> layout)); + CHECK(vkCreatePipelineLayout(handle_, &layout_create_info, nullptr, &kernel-> layout)); auto compute_pipeline_create_info = VkComputePipelineCreateInfo { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -449,7 +414,7 @@ VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& f .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0, }; - CHECK(vkCreateComputePipelines(device, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel->pipeline)); + CHECK(vkCreateComputePipelines(handle_, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel->pipeline)); return kernel; } @@ -464,21 +429,21 @@ void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_para vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, kernel->pipeline); std::array push_constants {}; size_t offset = 0; - for (uint32_t arg = 0; arg < launch_params.num_args; arg++) { - if (launch_params.args.types[arg] == KernelArgType::Val) { - assert(launch_params.args.sizes[arg] == 4 && "Preliminary support..."); - memcpy(push_constants.data() + offset, launch_params.args.data[arg], 4); - offset += 4; - } else if (launch_params.args.types[arg] == KernelArgType::Ptr) { - void* buffer = *(void**)launch_params.args.data[arg]; - auto dst_buffer_resource = (Buffer*) device->find_buffer_by_device_address((uint64_t) buffer); - uint64_t buffer_bda = dst_buffer_resource->bda; - memcpy(push_constants.data() + offset, &buffer_bda, 8); - offset += 8; - } else { - assert(false && "no struct support yet"); - } - } + //for (uint32_t arg = 0; arg < launch_params.num_args; arg++) { + // if (launch_params.args.types[arg] == KernelArgType::Val) { + // assert(launch_params.args.sizes[arg] == 4 && "Preliminary support..."); + // memcpy(push_constants.data() + offset, launch_params.args.data[arg], 4); + // offset += 4; + // } else if (launch_params.args.types[arg] == KernelArgType::Ptr) { + // void* buffer = *(void**)launch_params.args.data[arg]; + // auto dst_buffer_resource = (Buffer*) device->find_buffer_by_device_address((uint64_t) buffer); + // uint64_t buffer_bda = dst_buffer_resource->bda; + // memcpy(push_constants.data() + offset, &buffer_bda, 8); + // offset += 8; + // } else { + // assert(false && "no struct support yet"); + // } + //} vkCmdPushConstants(cmd_buf, kernel->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128, &push_constants); vkCmdDispatch(cmd_buf, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2]); }); @@ -493,13 +458,15 @@ VkExternalMemoryHandleTypeFlagBits imported_host_memory_handle_type = VK_EXTERNA std::pair VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { assert(can_import_host_memory && "This device does not support importing host memory"); + size_t alignment = external_memory_host_properties.minImportedHostPointerAlignment; + // Align stuff - size_t mask = ~(min_imported_host_ptr_alignment - 1); + size_t mask = ~(alignment - 1); size_t host_ptr = (size_t)ptr; size_t aligned_host_ptr = host_ptr & mask; size_t end = host_ptr + size; - size_t aligned_end = ((end + min_imported_host_ptr_alignment - 1) / min_imported_host_ptr_alignment) * min_imported_host_ptr_alignment; + size_t aligned_end = ((end + alignment - 1) / alignment) * alignment; size_t aligned_size = aligned_end - aligned_host_ptr; // where the memory we wanted to import will actually start @@ -509,8 +476,8 @@ std::pair VulkanPlatform::Device::import_host_memory(voi VkMemoryHostPointerPropertiesEXT host_ptr_properties { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; - CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(device, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties)); - uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, AllocHeap::HOST_VISIBLE); + CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(handle_, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties)); + uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); // Import memory auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { @@ -526,35 +493,10 @@ std::pair VulkanPlatform::Device::import_host_memory(voi .memoryTypeIndex = memory_type }; VkDeviceMemory imported_memory; - CHECK(vkAllocateMemory(device, &allocation_info, nullptr, &imported_memory)); + CHECK(vkAllocateMemory(handle_, &allocation_info, nullptr, &imported_memory)); return std::make_pair(imported_memory, offset); } -std::pair VulkanPlatform::Device::import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags) { - VkDeviceMemory imported_memory; - size_t imported_offset; - std::tie(imported_memory, imported_offset) = import_host_memory(ptr, size); - auto external_mem_buffer_create_info = VkExternalMemoryBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .pNext = nullptr, - .handleTypes = imported_host_memory_handle_type - }; - auto tmp_buffer_create_info = VkBufferCreateInfo { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = &external_mem_buffer_create_info, - .flags = 0, - .size = (VkDeviceSize) size, - .usage = usage_flags, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - VkBuffer buffer; - vkCreateBuffer(device, &tmp_buffer_create_info, nullptr, &buffer); - vkBindBufferMemory(device, buffer, imported_memory, imported_offset); - return std::make_pair(buffer, imported_memory); -} - VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { if (spare_cmd_bufs.size() > 0) { VkCommandBuffer cmd_buf = spare_cmd_bufs.back(); @@ -569,7 +511,7 @@ VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { .commandBufferCount = 1 }; VkCommandBuffer cmd_buf; - CHECK(vkAllocateCommandBuffers(device, &cmd_buf_create_info, &cmd_buf)); + CHECK(vkAllocateCommandBuffers(handle_, &cmd_buf_create_info, &cmd_buf)); return cmd_buf; } @@ -601,7 +543,7 @@ void VulkanPlatform::Device::execute_command_buffer_oneshot(std::functionfind_buffer_by_device_address((uint64_t) dst); - auto dst_buffer = dst_buffer_resource->buffer; + auto dst_buffer = device->get_buffer_by_device_address(reinterpret_cast(dst)); - VkBuffer tmp_buffer; - VkDeviceMemory memory; + std::unique_ptr tmp_buffer; void* host_ptr = (void*)((size_t)src + offset_src); - if (device->can_import_host_memory) { // Import host memory and wrap it in a buffer - std::tie(tmp_buffer, memory) = device->import_host_memory_as_buffer(host_ptr, size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + if (device->can_import_host_memory) { + tmp_buffer = std::make_unique(*device, size, Buffer::ImportedHostMemory { host_ptr }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); } else { - std::tie(tmp_buffer, memory) = device->allocate_buffer(size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, Device::AllocHeap::HOST_VISIBLE); + tmp_buffer = std::make_unique(*device, size, Buffer::HostMemory { }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); void* mapped = nullptr; - CHECK(vkMapMemory(device->device, memory, 0, size, 0, &mapped)); + CHECK(vkMapMemory(device->handle_, tmp_buffer->device_memory_, 0, size, 0, &mapped)); assert(mapped != nullptr); memcpy(mapped, host_ptr, size); - vkUnmapMemory(device->device, memory); + vkUnmapMemory(device->handle_, tmp_buffer->device_memory_); } device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { @@ -636,28 +576,22 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI .dstOffset = (VkDeviceSize) offset_dst, .size = (VkDeviceSize) size, }; - vkCmdCopyBuffer(cmd_buf, tmp_buffer, dst_buffer, 1, ©_region); + vkCmdCopyBuffer(cmd_buf, tmp_buffer->handle_, dst_buffer->handle_, 1, ©_region); }); - - // Cleanup - vkFreeMemory(device->device, memory, nullptr); - vkDestroyBuffer(device->device, tmp_buffer, nullptr); } void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { auto& device = usable_devices[dev_src]; - auto src_buffer_resource = device->find_buffer_by_device_address((uint64_t) src); - auto src_buffer = src_buffer_resource->buffer; + auto src_buffer = device->get_buffer_by_device_address(reinterpret_cast(src)); - VkBuffer tmp_buffer; - VkDeviceMemory memory; + std::unique_ptr tmp_buffer; void* host_ptr = (void*)((size_t)dst + offset_dst); - if (device->can_import_host_memory) { // Import host memory and wrap it in a buffer - std::tie(tmp_buffer, memory) = device->import_host_memory_as_buffer(host_ptr, size, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + if (device->can_import_host_memory) { + tmp_buffer = std::make_unique(*device, size, Buffer::ImportedHostMemory { host_ptr }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); } else { - std::tie(tmp_buffer, memory) = device->allocate_buffer(size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, Device::AllocHeap::HOST_VISIBLE); + tmp_buffer = std::make_unique(*device, size, Buffer::HostMemory { }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); } device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { @@ -666,20 +600,16 @@ void VulkanPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t off .dstOffset = 0, .size = (VkDeviceSize) size, }; - vkCmdCopyBuffer(cmd_buf, src_buffer, tmp_buffer, 1, ©_region); + vkCmdCopyBuffer(cmd_buf, src_buffer->handle_, tmp_buffer->handle_, 1, ©_region); }); if (!device->can_import_host_memory) { void* mapped = nullptr; - CHECK(vkMapMemory(device->device, memory, 0, size, 0, &mapped)); + CHECK(vkMapMemory(device->handle_, tmp_buffer->device_memory_, 0, size, 0, &mapped)); assert(mapped != nullptr); memcpy(host_ptr, mapped, size); - vkUnmapMemory(device->device, memory); + vkUnmapMemory(device->handle_, tmp_buffer->device_memory_); } - - // Cleanup - vkFreeMemory(device->device, memory, nullptr); - vkDestroyBuffer(device->device, tmp_buffer, nullptr); } const char *VulkanPlatform::device_name(DeviceId dev) const { @@ -690,16 +620,8 @@ void register_vulkan_platform(Runtime* runtime) { runtime->register_platform(); } -VulkanPlatform::Resource::~Resource() { - vkFreeMemory(device.device, alloc, nullptr); -} - -VulkanPlatform::Buffer::~Buffer() { - vkDestroyBuffer(device.device, buffer, nullptr); -} - VulkanPlatform::Kernel::~Kernel() { - vkDestroyPipeline(device.device, pipeline, nullptr); - vkDestroyPipelineLayout(device.device, layout, nullptr); - vkDestroyShaderModule(device.device, shader_module, nullptr); + vkDestroyPipeline(device.handle_, pipeline, nullptr); + vkDestroyPipelineLayout(device.handle_, layout, nullptr); + vkDestroyShaderModule(device.handle_, shader_module, nullptr); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index a4e5f110..50015302 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -5,6 +5,7 @@ #include #include +#include /// Vulkan requires you to manually load certain function pointers, we use a macro to automate the boilerplate #define DevicesExtensionsFunctions(f) \ @@ -20,21 +21,15 @@ class VulkanPlatform : public Platform { void *alloc(DeviceId dev, int64_t size) override; void *alloc_host(DeviceId dev, int64_t size) override; void *alloc_unified(DeviceId dev, int64_t size) override { command_unavailable("alloc_unified"); } - void *get_device_ptr(DeviceId dev, void *ptr) override; - void release(DeviceId dev, void *ptr) override; - void release_host(DeviceId dev, void *ptr) override; void launch_kernel(DeviceId dev, const LaunchParams &launch_params) override; - void synchronize(DeviceId dev) override; void copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) override; - void copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) override; - void copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) override; size_t dev_count() const override { return usable_devices.size(); } @@ -46,21 +41,40 @@ class VulkanPlatform : public Platform { struct Device; struct Resource { - //public: - Device& device; - size_t id; - VkDeviceMemory alloc; + Device& device_; - Resource(Device& device) : device(device) {} - virtual ~Resource(); + Resource(Device& device) : device_(device) {} + virtual ~Resource() {}; }; struct Buffer : public Resource { - VkBuffer buffer; - uint64_t bda = -1; - size_t mapped_host_address = 0; + VkBuffer handle_; + + void* host_address_ = nullptr; + VkDeviceAddress device_address_ = 0; + + VkDeviceMemory device_memory_; + + const static VkBufferUsageFlags2 ALL_BUFFER_USAGE = + VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_2_TRANSFER_DST_BIT; + + + struct ImportedHostMemory { + void* host_memory_; + }; + + struct DeviceMemory {}; + struct HostMemory {}; + struct UnifiedMemory {}; - Buffer(Device& device) : Resource(device) {} + using BackingStorage = std::variant; + friend Device; + friend Platform; + + Buffer(Device& device, size_t size, BackingStorage backing_storage, VkBufferUsageFlags2 usages = ALL_BUFFER_USAGE); ~Buffer() override; }; @@ -82,41 +96,46 @@ class VulkanPlatform : public Platform { }; struct Device { - enum class AllocHeap { - DEVICE_LOCAL, - HOST_VISIBLE - }; - VulkanPlatform& platform; VkPhysicalDevice physical_device; - VkPhysicalDeviceProperties2 properties; + VkDevice handle_ = nullptr; size_t device_id; - VkDevice device = nullptr; - size_t min_imported_host_ptr_alignment; + ExtensionFns extension_fns; + + VkPhysicalDeviceProperties2 properties = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + }; + bool can_import_host_memory = false; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_properties { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + .pNext = nullptr, + .minImportedHostPointerAlignment = 0xFFFFFFFF, + }; + + std::unordered_map> buffers_; + std::unordered_map> kernels; - std::vector> resources; - size_t next_resource_id = 1; // resource id 0 is reserved VkQueue queue; VkCommandPool cmd_pool; std::vector spare_cmd_bufs; - std::unordered_map> kernels; - ExtensionFns extension_fns; Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id); ~Device(); - uint32_t find_suitable_memory_type(uint32_t memory_type_bits, AllocHeap); - + uint32_t find_suitable_memory_type(uint32_t memory_type_bits, VkMemoryPropertyFlags, VkMemoryHeapFlags = 0); + VkDeviceMemory allocate_memory(VkDeviceSize, uint32_t memory_type_bits, VkMemoryPropertyFlags memory_flags, VkMemoryHeapFlags heap_flags = 0); std::pair import_host_memory(void* ptr, size_t size); - std::pair import_host_memory_as_buffer(void* ptr, size_t size, VkBufferUsageFlags usage_flags); - std::pair allocate_buffer(int64_t, VkBufferUsageFlags usage_flags, AllocHeap); - Resource* find_resource_by_id(size_t id); - Buffer* create_buffer_resource(int64_t, VkBufferUsageFlags usage_flags, AllocHeap); - Buffer* find_buffer_by_device_address(uint64_t bda); - Buffer* find_buffer_by_host_address(size_t host_address); + Buffer* get_buffer_by_device_address(VkDeviceAddress addr) { + auto found = buffers_.find(addr); + if (found != buffers_.end()) + return &*found->second; + return nullptr; + } + + uint64_t create_buffer_resource(size_t, Buffer::BackingStorage backing, VkBufferUsageFlags usage_flags); VkCommandBuffer obtain_command_buffer(); void return_command_buffer(VkCommandBuffer cmd_buf); From 1e142cd3330f277b29d68cbfe7ca5540e5b2e780 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 17 Oct 2025 17:33:30 +0200 Subject: [PATCH 37/43] vulkan: fix buffers --- src/vulkan_platform.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index 2c3b1a7c..b7e86934 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -282,10 +282,9 @@ VulkanPlatform::Buffer::Buffer(Device& device, size_t size, BackingStorage backi insert_pnext(buffer_create_info, external_mem_buffer_create_info); create_buffer(); - VkDeviceMemory imported_memory; size_t imported_offset; - std::tie(imported_memory, imported_offset) = device.import_host_memory(import_host->host_memory_, size); - vkBindBufferMemory(device.handle_, handle_, imported_memory, imported_offset); + std::tie(device_memory_, imported_offset) = device.import_host_memory(import_host->host_memory_, size); + vkBindBufferMemory(device.handle_, handle_, device_memory_, imported_offset); } else if (std::get_if(&backing)) { create_buffer(); VkMemoryRequirements memory_requirements; @@ -560,9 +559,9 @@ void VulkanPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceI void* host_ptr = (void*)((size_t)src + offset_src); // Import host memory and wrap it in a buffer if (device->can_import_host_memory) { - tmp_buffer = std::make_unique(*device, size, Buffer::ImportedHostMemory { host_ptr }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + tmp_buffer = std::make_unique(*device, size, Buffer::ImportedHostMemory { host_ptr }, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); } else { - tmp_buffer = std::make_unique(*device, size, Buffer::HostMemory { }, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + tmp_buffer = std::make_unique(*device, size, Buffer::HostMemory { }, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); void* mapped = nullptr; CHECK(vkMapMemory(device->handle_, tmp_buffer->device_memory_, 0, size, 0, &mapped)); assert(mapped != nullptr); From 924df20dd247cfe6a4b29f64d73d689d219714b4 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 17 Oct 2025 17:37:30 +0200 Subject: [PATCH 38/43] vulkan: moved memory import code --- src/vulkan_platform.cpp | 86 ++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index b7e86934..d8209df6 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -259,6 +259,48 @@ VkDeviceMemory VulkanPlatform::Device::allocate_memory(VkDeviceSize size, uint32 return memory; } +std::pair VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { + assert(can_import_host_memory && "This device does not support importing host memory"); + + size_t alignment = external_memory_host_properties.minImportedHostPointerAlignment; + + // Align stuff + size_t mask = ~(alignment - 1); + size_t host_ptr = (size_t)ptr; + size_t aligned_host_ptr = host_ptr & mask; + + size_t end = host_ptr + size; + size_t aligned_end = ((end + alignment - 1) / alignment) * alignment; + size_t aligned_size = aligned_end - aligned_host_ptr; + + // where the memory we wanted to import will actually start + size_t offset = host_ptr - aligned_host_ptr; + + // Find the corresponding device memory type index + VkMemoryHostPointerPropertiesEXT host_ptr_properties { + .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(handle_, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, (void*)aligned_host_ptr, &host_ptr_properties)); + uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + + // Import memory + auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .pNext = nullptr, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pHostPointer = (void*) aligned_host_ptr, + }; + auto allocation_info = VkMemoryAllocateInfo { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_ptr_info, + .allocationSize = (VkDeviceSize) aligned_size, + .memoryTypeIndex = memory_type + }; + VkDeviceMemory imported_memory; + CHECK(vkAllocateMemory(handle_, &allocation_info, nullptr, &imported_memory)); + return std::make_pair(imported_memory, offset); +} + VulkanPlatform::Buffer::Buffer(Device& device, size_t size, BackingStorage backing, VkBufferUsageFlags2 usage) : Resource(device) { VkBufferCreateInfo buffer_create_info { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -452,50 +494,6 @@ void VulkanPlatform::synchronize(DeviceId dev) { // TODO: don't wait for idle everywhere } -VkExternalMemoryHandleTypeFlagBits imported_host_memory_handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; - -std::pair VulkanPlatform::Device::import_host_memory(void *ptr, size_t size) { - assert(can_import_host_memory && "This device does not support importing host memory"); - - size_t alignment = external_memory_host_properties.minImportedHostPointerAlignment; - - // Align stuff - size_t mask = ~(alignment - 1); - size_t host_ptr = (size_t)ptr; - size_t aligned_host_ptr = host_ptr & mask; - - size_t end = host_ptr + size; - size_t aligned_end = ((end + alignment - 1) / alignment) * alignment; - size_t aligned_size = aligned_end - aligned_host_ptr; - - // where the memory we wanted to import will actually start - size_t offset = host_ptr - aligned_host_ptr; - - // Find the corresponding device memory type index - VkMemoryHostPointerPropertiesEXT host_ptr_properties { - .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, - }; - CHECK(extension_fns.vkGetMemoryHostPointerPropertiesEXT(handle_, imported_host_memory_handle_type, (void*)aligned_host_ptr, &host_ptr_properties)); - uint32_t memory_type = find_suitable_memory_type(host_ptr_properties.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - - // Import memory - auto import_ptr_info = VkImportMemoryHostPointerInfoEXT { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, - .pNext = nullptr, - .handleType = imported_host_memory_handle_type, - .pHostPointer = (void*) aligned_host_ptr, - }; - auto allocation_info = VkMemoryAllocateInfo { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &import_ptr_info, - .allocationSize = (VkDeviceSize) aligned_size, - .memoryTypeIndex = memory_type - }; - VkDeviceMemory imported_memory; - CHECK(vkAllocateMemory(handle_, &allocation_info, nullptr, &imported_memory)); - return std::make_pair(imported_memory, offset); -} - VkCommandBuffer VulkanPlatform::Device::obtain_command_buffer() { if (spare_cmd_bufs.size() > 0) { VkCommandBuffer cmd_buf = spare_cmd_bufs.back(); From 803ed922727ff11a37e4c396a5ba0d0abc55386f Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 17 Oct 2025 17:58:36 +0200 Subject: [PATCH 39/43] vulkan: start drafting non-invasive shady runtime integration --- src/CMakeLists.txt | 2 +- src/vulkan_platform.cpp | 22 ++++++++++++++++------ src/vulkan_platform.h | 20 +++++++++++++++++--- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2c7d90ce..278f5d30 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -155,7 +155,7 @@ if(Vulkan_FOUND) find_package(shady REQUIRED) add_library(runtime_vulkan STATIC vulkan_platform.cpp vulkan_platform.h) target_include_directories(runtime_vulkan PRIVATE ${Vulkan_INCLUDE_DIRS}) - target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES}) + target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES} shady::runtime shady::api shady::driver) list(APPEND RUNTIME_PLATFORMS runtime_vulkan) endif() set(AnyDSL_runtime_HAS_Vulkan_SUPPORT ${Vulkan_FOUND} CACHE INTERNAL "enables Vulkan support") diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index d8209df6..b696a0e7 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -99,7 +99,7 @@ VulkanPlatform::~VulkanPlatform() { } VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physical_device, size_t device_id) -: platform(platform), physical_device(physical_device), device_id(device_id) { +: platform_(platform), physical_device(physical_device), device_id(device_id) { uint32_t exts_count; vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &exts_count, nullptr); std::vector available_device_extensions(exts_count); @@ -402,13 +402,23 @@ VulkanPlatform::Buffer::~Buffer() { vkDestroyBuffer(device_.handle_, handle_, nullptr); } +VulkanPlatform::Kernel::Kernel(Device& device, std::string file_name) : device_(device) { + std::string program_src = device_.platform_.runtime_->load_file(file_name); + shd_driver_load_source_file(&device_.platform_.compiler_config_, &device_.target_config_, shady::SrcSPIRV, program_src.size(), program_src.c_str(), "test", &shady_module_); + + shady::DriverConfig config = shady::shd_default_driver_config(); + //shady::shd_driver_compile() + //handle_ = shd_rn_new_program_from_module(device_.platform_.runner_, &device_.platform_.compiler_config_, shady_module_); +} + + VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { auto ki = kernels.find(filename); if (ki == kernels.end()) { - auto [i,b] = kernels.emplace(filename, std::make_unique(*this)); + auto [i,b] = kernels.emplace(filename, std::make_unique(*this, filename)); Kernel* kernel = i->second.get(); - std::string bin = platform.runtime_->load_file(filename); + std::string bin = platform_.runtime_->load_file(filename); auto shader_module_create_info = VkShaderModuleCreateInfo { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, @@ -618,7 +628,7 @@ void register_vulkan_platform(Runtime* runtime) { } VulkanPlatform::Kernel::~Kernel() { - vkDestroyPipeline(device.handle_, pipeline, nullptr); - vkDestroyPipelineLayout(device.handle_, layout, nullptr); - vkDestroyShaderModule(device.handle_, shader_module, nullptr); + vkDestroyPipeline(device_.handle_, pipeline, nullptr); + vkDestroyPipelineLayout(device_.handle_, layout, nullptr); + vkDestroyShaderModule(device_.handle_, shader_module, nullptr); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 50015302..8259ce0b 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -4,6 +4,13 @@ #include "platform.h" #include +namespace shady { +extern "C" { +#include "shady/runtime/vulkan.h" +#include "shady/driver.h" +} +} + #include #include @@ -79,13 +86,15 @@ class VulkanPlatform : public Platform { }; struct Kernel { - Device& device; + Device& device_; + + shady::Module* shady_module_; VkShaderModule shader_module; VkPipelineLayout layout; VkPipeline pipeline; - Kernel(Device& device) : device(device) {} + Kernel(Device& device, std::string); ~Kernel(); }; @@ -96,7 +105,7 @@ class VulkanPlatform : public Platform { }; struct Device { - VulkanPlatform& platform; + VulkanPlatform& platform_; VkPhysicalDevice physical_device; VkDevice handle_ = nullptr; size_t device_id; @@ -114,6 +123,9 @@ class VulkanPlatform : public Platform { .minImportedHostPointerAlignment = 0xFFFFFFFF, }; + shady::ShadyVkrPhysicalDeviceCaps shady_caps_; + shady::TargetConfig target_config_; + std::unordered_map> buffers_; std::unordered_map> kernels; @@ -147,6 +159,8 @@ class VulkanPlatform : public Platform { VkInstance instance; std::vector physical_devices; std::vector> usable_devices; + + shady::CompilerConfig compiler_config_ = shady::shd_default_compiler_config(); }; #endif From fa2fc380376fcda48027b5d6f8a097f45901d774 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 22 Oct 2025 15:59:50 +0200 Subject: [PATCH 40/43] runtime: ask shady to JIT the code --- src/vulkan_platform.cpp | 193 ++++++++++++++++++++++++---------------- src/vulkan_platform.h | 8 +- 2 files changed, 121 insertions(+), 80 deletions(-) diff --git a/src/vulkan_platform.cpp b/src/vulkan_platform.cpp index b696a0e7..4f97ee10 100644 --- a/src/vulkan_platform.cpp +++ b/src/vulkan_platform.cpp @@ -1,5 +1,12 @@ #include "vulkan_platform.h" +namespace shady { +extern "C" { +#include "shady/jit/vulkan.h" +#include "shady/be/spirv.h" +} +} + const auto khr_validation = "VK_LAYER_KHRONOS_validation"; #define CHECK(stuff) { \ @@ -212,6 +219,10 @@ VulkanPlatform::Device::Device(VulkanPlatform& platform, VkPhysicalDevice physic #define f(s) extension_fns.s = (PFN_##s) vkGetDeviceProcAddr(handle_, #s); DevicesExtensionsFunctions(f) #undef f + + bool device_ok = shady::shd_rt_vk_check_physical_device_suitability(physical_device, &shady_caps_); + assert(device_ok); + target_config_ = shady::shd_rt_vk_get_device_target_config(&platform_.compiler_config_, &shady_caps_); } VulkanPlatform::Device::~Device() { @@ -402,101 +413,129 @@ VulkanPlatform::Buffer::~Buffer() { vkDestroyBuffer(device_.handle_, handle_, nullptr); } -VulkanPlatform::Kernel::Kernel(Device& device, std::string file_name) : device_(device) { +VulkanPlatform::Kernel::Kernel(Device& device, std::string file_name, std::string kernel_name) : device_(device) { + shady::TargetConfig specialized_target = device_.target_config_; + specialized_target.execution_model = shady::ShdExecutionModelCompute; + specialized_target.entry_point = kernel_name.c_str(); + std::string program_src = device_.platform_.runtime_->load_file(file_name); shd_driver_load_source_file(&device_.platform_.compiler_config_, &device_.target_config_, shady::SrcSPIRV, program_src.size(), program_src.c_str(), "test", &shady_module_); + // TODO: this will be removed in a future version of Shady + shady::CompilerConfig specialized_config = device_.platform_.compiler_config_; + shady::SPVBackendConfig backend_config; + shady::shd_jit_vk_get_compiler_config_for_device(&device_.shady_caps_, &device_.target_config_, &backend_config, &specialized_config); + shady::shd_jit_vk_compile_module(&shady_module_, &specialized_target, &backend_config, &specialized_config); + size_t spirv_size; + char* spirv_bytes; + shady::shd_emit_spirv(&specialized_config, &backend_config, shady_module_, &spirv_size, &spirv_bytes); + + size_t interface_size; + shady::shd_rt_vk_get_module_interface(shady_module_, &interface_size, nullptr); + interface.resize(interface_size); + shady::shd_rt_vk_get_module_interface(shady_module_, &interface_size, interface.data()); + + for (auto& e : interface) { + if (e.dst_kind == shady::RuntimeInterfaceItem::SHD_RII_Dst_PushConstant) + push_constant_size = std::max(push_constant_size, e.dst_details.push_constant.offset + e.dst_details.push_constant.size); + } - shady::DriverConfig config = shady::shd_default_driver_config(); - //shady::shd_driver_compile() - //handle_ = shd_rn_new_program_from_module(device_.platform_.runner_, &device_.platform_.compiler_config_, shady_module_); -} + auto shader_module_create_info = VkShaderModuleCreateInfo { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = spirv_size, + .pCode = reinterpret_cast(spirv_bytes), + }; + CHECK(vkCreateShaderModule(device.handle_, &shader_module_create_info, nullptr, &shader_module)); + auto stage = VkPipelineShaderStageCreateInfo { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = shader_module, + .pName = kernel_name.c_str(), + .pSpecializationInfo = nullptr, + }; -VulkanPlatform::Kernel *VulkanPlatform::Device::load_kernel(const std::string& filename) { - auto ki = kernels.find(filename); - if (ki == kernels.end()) { - auto [i,b] = kernels.emplace(filename, std::make_unique(*this, filename)); - Kernel* kernel = i->second.get(); + std::vector push_constants { + VkPushConstantRange { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast(push_constant_size) + } + }; + auto layout_create_info = VkPipelineLayoutCreateInfo { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 0, + .pSetLayouts = nullptr, + .pushConstantRangeCount = (uint32_t) push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }; + CHECK(vkCreatePipelineLayout(device.handle_, &layout_create_info, nullptr, &layout)); - std::string bin = platform_.runtime_->load_file(filename); - auto shader_module_create_info = VkShaderModuleCreateInfo { - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = bin.size(), - .pCode = reinterpret_cast(bin.c_str()), - }; - CHECK(vkCreateShaderModule(handle_, &shader_module_create_info, nullptr, &kernel->shader_module)); + auto compute_pipeline_create_info = VkComputePipelineCreateInfo { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = stage, + .layout = layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }; + CHECK(vkCreateComputePipelines(device.handle_, nullptr, 1, &compute_pipeline_create_info, nullptr, &pipeline)); +} - auto stage = VkPipelineShaderStageCreateInfo { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = kernel->shader_module, - .pName = "kernel_main", - .pSpecializationInfo = nullptr, - }; +VulkanPlatform::Kernel* VulkanPlatform::Device::load_kernel(const std::string& filename, const std::string& kernel_name) { + auto key = filename + "::" + kernel_name; + auto ki = kernels.find(key); + if (ki == kernels.end()) { + auto [i,b] = kernels.emplace(key, std::make_unique(*this, filename, kernel_name)); + return &*i->second; + } - std::vector push_constants { - VkPushConstantRange { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = 128 + return ki->second.get(); +} + +void VulkanPlatform::Kernel::setup(VkCommandBuffer cmdbuf, const LaunchParams& launch_params) { + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + std::vector push_constants; + push_constants.resize(push_constant_size); + + for (auto& e : interface) { + if (e.dst_kind == shady::RuntimeInterfaceItem::SHD_RII_Dst_PushConstant) { + switch (e.src_kind) { + case shady::RuntimeInterfaceItem::SHD_RII_Src_Param: + assert(e.dst_details.push_constant.size == launch_params.args.sizes[e.src_details.param.param_idx]); + memcpy(reinterpret_cast(push_constants.data()) + e.dst_details.push_constant.offset, launch_params.args.data[e.src_details.param.param_idx], e.dst_details.push_constant.size); + break; + default: + error("TODO"); + //case shady::RuntimeInterfaceItem::SHD_RII_Src_TmpAllocation: + // break; + //case shady::RuntimeInterfaceItem::SHD_RII_Src_LiftedConstant: + // break; + //case shady::RuntimeInterfaceItem::SHD_RII_Src_ScratchBuffer: + // break; } - }; - auto layout_create_info = VkPipelineLayoutCreateInfo { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 0, - .pSetLayouts = nullptr, - .pushConstantRangeCount = (uint32_t) push_constants.size(), - .pPushConstantRanges = push_constants.data(), - }; - CHECK(vkCreatePipelineLayout(handle_, &layout_create_info, nullptr, &kernel-> layout)); - auto compute_pipeline_create_info = VkComputePipelineCreateInfo { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = stage, - .layout = kernel->layout, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }; - CHECK(vkCreateComputePipelines(handle_, nullptr, 1, &compute_pipeline_create_info, nullptr, &kernel->pipeline)); - return kernel; + } else { + error("todo: implement descriptors"); + } } - return ki->second.get(); + vkCmdPushConstants(cmdbuf, layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size, push_constants.data()); + vkCmdDispatch(cmdbuf, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2]); } void VulkanPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { auto& device = usable_devices[dev]; - auto kernel = device->load_kernel(launch_params.file_name); + auto kernel = device->load_kernel(launch_params.file_name, launch_params.kernel_name); device->execute_command_buffer_oneshot([&](VkCommandBuffer cmd_buf) { - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, kernel->pipeline); - std::array push_constants {}; - size_t offset = 0; - //for (uint32_t arg = 0; arg < launch_params.num_args; arg++) { - // if (launch_params.args.types[arg] == KernelArgType::Val) { - // assert(launch_params.args.sizes[arg] == 4 && "Preliminary support..."); - // memcpy(push_constants.data() + offset, launch_params.args.data[arg], 4); - // offset += 4; - // } else if (launch_params.args.types[arg] == KernelArgType::Ptr) { - // void* buffer = *(void**)launch_params.args.data[arg]; - // auto dst_buffer_resource = (Buffer*) device->find_buffer_by_device_address((uint64_t) buffer); - // uint64_t buffer_bda = dst_buffer_resource->bda; - // memcpy(push_constants.data() + offset, &buffer_bda, 8); - // offset += 8; - // } else { - // assert(false && "no struct support yet"); - // } - //} - vkCmdPushConstants(cmd_buf, kernel->layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128, &push_constants); - vkCmdDispatch(cmd_buf, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2]); + kernel->setup(cmd_buf, launch_params); }); } diff --git a/src/vulkan_platform.h b/src/vulkan_platform.h index 8259ce0b..38f3f615 100644 --- a/src/vulkan_platform.h +++ b/src/vulkan_platform.h @@ -7,7 +7,6 @@ namespace shady { extern "C" { #include "shady/runtime/vulkan.h" -#include "shady/driver.h" } } @@ -89,12 +88,15 @@ class VulkanPlatform : public Platform { Device& device_; shady::Module* shady_module_; + std::vector interface; + size_t push_constant_size = 0; VkShaderModule shader_module; VkPipelineLayout layout; VkPipeline pipeline; - Kernel(Device& device, std::string); + Kernel(Device& device, std::string, std::string); + void setup(VkCommandBuffer, const LaunchParams &launch_params); ~Kernel(); }; @@ -153,7 +155,7 @@ class VulkanPlatform : public Platform { void return_command_buffer(VkCommandBuffer cmd_buf); void execute_command_buffer_oneshot(std::function fn); - Kernel* load_kernel(const std::string&); + Kernel* load_kernel(const std::string&, const std::string&); }; VkInstance instance; From 0d2c9beb1b7b6e6729dfe388c15a5405908f611e Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 22 Oct 2025 16:53:22 +0200 Subject: [PATCH 41/43] remove shady runner support --- src/CMakeLists.txt | 13 +--- src/anydsl_runtime.cpp | 1 - src/platform.h | 1 - src/runtime.cpp | 3 - src/shady_platform.cpp | 135 ----------------------------------------- src/shady_platform.h | 50 --------------- 6 files changed, 1 insertion(+), 202 deletions(-) delete mode 100644 src/shady_platform.cpp delete mode 100644 src/shady_platform.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 278f5d30..3c9fb157 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -139,20 +139,9 @@ if(pal_FOUND) endif() set(AnyDSL_runtime_HAS_PAL_SUPPORT ${pal_FOUND} CACHE INTERNAL "enables PAL support") -# look for shady find_package(shady) -if (shady_FOUND) - find_package(Vulkan REQUIRED) - message("It's shading time") - add_library(runtime_shady STATIC shady_platform.cpp shady_platform.h) - target_link_libraries(runtime_shady PUBLIC shady::runtime shady::api) - list(APPEND RUNTIME_PLATFORMS runtime_shady) -endif() -set(AnyDSL_runtime_HAS_SHADY_SUPPORT ${shady_FOUND} CACHE INTERNAL "enables Shady support") - find_package(Vulkan) -if(Vulkan_FOUND) - find_package(shady REQUIRED) +if(Vulkan_FOUND AND shady_FOUND) add_library(runtime_vulkan STATIC vulkan_platform.cpp vulkan_platform.h) target_include_directories(runtime_vulkan PRIVATE ${Vulkan_INCLUDE_DIRS}) target_link_libraries(runtime_vulkan PRIVATE runtime_base ${Vulkan_LIBRARIES} shady::runtime shady::api shady::driver) diff --git a/src/anydsl_runtime.cpp b/src/anydsl_runtime.cpp index 9c89a6b4..ea2b5a80 100644 --- a/src/anydsl_runtime.cpp +++ b/src/anydsl_runtime.cpp @@ -37,7 +37,6 @@ struct RuntimeSingleton { register_hsa_platform(&runtime); register_pal_platform(&runtime); register_levelzero_platform(&runtime); - register_shady_platform(&runtime); register_vulkan_platform(&runtime); } diff --git a/src/platform.h b/src/platform.h index 546bfc13..34f7be47 100644 --- a/src/platform.h +++ b/src/platform.h @@ -14,7 +14,6 @@ void register_opencl_platform(Runtime*); void register_hsa_platform(Runtime*); void register_pal_platform(Runtime*); void register_levelzero_platform(Runtime*); -void register_shady_platform(Runtime*); void register_vulkan_platform(Runtime*); /// A runtime platform. Exposes a set of devices, a copy function, diff --git a/src/runtime.cpp b/src/runtime.cpp index 8347c212..bdb4ce22 100644 --- a/src/runtime.cpp +++ b/src/runtime.cpp @@ -23,9 +23,6 @@ void register_pal_platform(Runtime* runtime) { runtime->register_platformregister_platform("Level Zero"); } #endif -#ifndef AnyDSL_runtime_HAS_SHADY_SUPPORT -void register_shady_platform(Runtime* runtime) { runtime->register_platform("Shady"); } -#endif #ifndef AnyDSL_runtime_HAS_Vulkan_SUPPORT void register_vulkan_platform(Runtime* runtime) { runtime->register_platform("Vulkan"); } #endif diff --git a/src/shady_platform.cpp b/src/shady_platform.cpp deleted file mode 100644 index de29824b..00000000 --- a/src/shady_platform.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#include "shady_platform.h" - -using namespace shady; - -struct ShadyBuffer { - ShadyPlatform::ShadyDevice& device_; - shady::Buffer* handle_; - size_t size_; - - ShadyBuffer(ShadyPlatform::ShadyDevice& device, size_t size); -}; - -struct ShadyProgram { - ShadyPlatform::ShadyDevice& device_; - shady::Module* module_; - shady::Program* handle_; - - ShadyProgram(ShadyPlatform::ShadyDevice&, std::string); -}; - -struct ShadyPlatform::ShadyDevice { - ShadyPlatform& platform_; - DeviceId id_; - shady::Device* handle_; - shady::TargetConfig target_config_; - - std::unordered_map> buffers_; - std::unordered_map> programs_; - - ShadyDevice(ShadyPlatform& platform, DeviceId id) : platform_(platform), id_(id) { - handle_ = shd_rn_get_device(platform.runner_, id); - target_config_ = shd_rn_get_device_target_config(&platform_.compiler_config_, handle_); - } - - ShadyProgram& load_program(std::string filename); -}; - -ShadyBuffer::ShadyBuffer(ShadyPlatform::ShadyDevice& device, size_t size) : device_(device), size_(size) { - handle_ = shady::shd_rn_allocate_buffer_device(device_.handle_, size); -} - -ShadyPlatform::ShadyPlatform(Runtime *r) : Platform(r) { - shady::RunnerConfig cfg; - cfg.dump_spv = true; - cfg.use_validation = true; - - compiler_config_.dynamic_scheduling = false; - - runner_ = shady::shd_rn_initialize(cfg); - for (size_t i = 0; i < shd_rn_device_count(runner_); i++) { - devices_.emplace_back(std::make_unique(*this, (DeviceId) i)); - } -} - -ShadyPlatform::~ShadyPlatform() { - shd_rn_shutdown(runner_); -} - -void* ShadyPlatform::alloc(DeviceId dev, int64_t size) { - auto& device = devices_[dev]; - auto buffer = std::make_unique(*device, (size_t) size); - uint64_t device_address = shd_rn_get_buffer_device_pointer(buffer->handle_); - device->buffers_[device_address] = std::move(buffer); - return reinterpret_cast(device_address); -} - -void* ShadyPlatform::alloc_host(DeviceId dev, int64_t size) { - assert(false); -} - -void* ShadyPlatform::alloc_unified(DeviceId dev, int64_t size) { - assert(false); -} - -void* ShadyPlatform::get_device_ptr(DeviceId dev, void *ptr) { - assert(false); -} - -void ShadyPlatform::release(DeviceId dev, void *ptr) { - auto& device = devices_[dev]; - device->buffers_.erase((uint64_t) ptr); -} - -void ShadyPlatform::release_host(DeviceId dev, void *ptr) { - assert(false); -} - -ShadyProgram& ShadyPlatform::ShadyDevice::load_program(std::string filename) { - if (auto found = programs_.find(filename); found != programs_.end()) - return *found->second; - return *(programs_[filename] = std::make_unique(*this, filename)); -} - -ShadyProgram::ShadyProgram(ShadyPlatform::ShadyDevice& device, std::string file_name) : device_(device) { - std::string program_src = device_.platform_.runtime_->load_file(file_name); - shd_driver_load_source_file(&device_.platform_.compiler_config_, &device_.target_config_, SrcSPIRV, program_src.size(), program_src.c_str(), "test", &module_); - handle_ = shd_rn_new_program_from_module(device_.platform_.runner_, &device_.platform_.compiler_config_, module_); -} - -void ShadyPlatform::launch_kernel(DeviceId dev, const LaunchParams &launch_params) { - auto& device = devices_[dev]; - auto& program = device->load_program(launch_params.file_name); - - std::vector args; - for (uint32_t argIdx = 0; argIdx < launch_params.num_args; ++argIdx) { - args.push_back(launch_params.args.data[argIdx]); - //WRAP_LEVEL_ZERO(zeKernelSetArgumentValue(hKernel, argIdx, launch_params.args.sizes[argIdx], launch_params.args.data[argIdx])); - } - - shady::Command* d = shady::shd_rn_launch_kernel(program.handle_, device->handle_, launch_params.kernel_name, launch_params.grid[0] / launch_params.block[0], launch_params.grid[1] / launch_params.block[1], launch_params.grid[2] / launch_params.block[2], args.size(), args.data(), nullptr); - assert(d); - shady::shd_rn_wait_completion(d); -} - -void ShadyPlatform::synchronize(DeviceId dev) {} - -void ShadyPlatform::copy(DeviceId dev_src, const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { - assert(false); -} - -void ShadyPlatform::copy_from_host(const void *src, int64_t offset_src, DeviceId dev_dst, void *dst, int64_t offset_dst, int64_t size) { - auto& dst_device = devices_[dev_dst]; - auto& dst_buffer = dst_device->buffers_[(uint64_t) dst]; - shd_rn_copy_to_buffer(dst_buffer->handle_, offset_dst, (char*) src + offset_src, size); -} - -void ShadyPlatform::copy_to_host(DeviceId dev_src, const void *src, int64_t offset_src, void *dst, int64_t offset_dst, int64_t size) { - auto& src_device = devices_[dev_src]; - auto& src_buffer = src_device->buffers_[(uint64_t) src]; - shd_rn_copy_from_buffer(src_buffer->handle_, offset_src, (char*) dst + offset_dst, size); -} - -void register_shady_platform(Runtime* runtime) { - runtime->register_platform(); -} \ No newline at end of file diff --git a/src/shady_platform.h b/src/shady_platform.h deleted file mode 100644 index b4271253..00000000 --- a/src/shady_platform.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef ANYDSL_RUNTIME_RUNTIME_SHADY_H -#define ANYDSL_RUNTIME_RUNTIME_SHADY_H - -#include "platform.h" - -namespace shady { -extern "C" { -#include "shady/runner/runner.h" -#include "shady/driver.h" -} -} - -struct ShadyProgram; - -class ShadyPlatform : public Platform { -public: - ShadyPlatform(Runtime* runtime); - ~ShadyPlatform() override; - - void* alloc(DeviceId dev, int64_t size) override; - void* alloc_host(DeviceId dev, int64_t size) override; - void* alloc_unified(DeviceId dev, int64_t size) override; - void* get_device_ptr(DeviceId dev, void* ptr) override; - void release(DeviceId dev, void* ptr) override; - void release_host(DeviceId dev, void* ptr) override; - - void launch_kernel(DeviceId dev, const LaunchParams& launch_params) override; - void synchronize(DeviceId dev) override; - - void copy(DeviceId dev_src, const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override; - void copy_from_host(const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override; - void copy_to_host(DeviceId dev_src, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) override; - - std::string name() const override { return "shady"; } - size_t dev_count() const override { return 1; } - const char * device_name(DeviceId dev) const override { return "TODO"; } - bool device_check_feature_support(DeviceId dev, const char* feature) const override { return false; } - - struct ShadyDevice; -private: - shady::CompilerConfig compiler_config_ = shady::shd_default_compiler_config(); - shady::Runner* runner_; - - std::vector> devices_; - - friend ShadyDevice; - friend ShadyProgram; -}; - -#endif //ANYDSL_RUNTIME_RUNTIME_SHADY_H From c56eb53fd5b6aa786314090f1c203f2a7a9658ba Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 24 Oct 2025 18:33:06 +0200 Subject: [PATCH 42/43] add more vulkan offloading intrinsics --- platforms/artic/intrinsics_spirv.impala | 5 ++++- platforms/artic/intrinsics_thorin.impala | 10 ++++++++++ platforms/artic/runtime.impala | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/platforms/artic/intrinsics_spirv.impala b/platforms/artic/intrinsics_spirv.impala index 570ff470..1ac28017 100644 --- a/platforms/artic/intrinsics_spirv.impala +++ b/platforms/artic/intrinsics_spirv.impala @@ -1 +1,4 @@ -#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T; \ No newline at end of file +#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T; + +#[import(cc = "device", name = "spirv.builtin")] fn spirv_make_global_variable[T]() -> T; +#[import(cc = "device", name = "spirv.builtin")] fn spirv_decorate_literal[T](T, u32, u32) -> T; diff --git a/platforms/artic/intrinsics_thorin.impala b/platforms/artic/intrinsics_thorin.impala index bc92aa7f..276d8eaa 100644 --- a/platforms/artic/intrinsics_thorin.impala +++ b/platforms/artic/intrinsics_thorin.impala @@ -20,6 +20,16 @@ #[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn levelzero(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn vulkan_cs(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); + +struct VulkanOffloadInfo { + // talks to the runtime to setup this pipeline for execution + setup_offloaded_args: fn() -> (), + filename: &[u8], + num_stages: u32, + stages: &[(u32, &[u8])] +} + +//#[import(cc = "thorin")] fn vulkan_offload(_num_stages: u32, stages: &[(u32, fn() -> ())]) -> VulkanOffloadInfo; #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T]; #[import(cc = "thorin")] fn hls(_dev: i32, _body: fn() -> ()) -> (); #[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend diff --git a/platforms/artic/runtime.impala b/platforms/artic/runtime.impala index cedd5b30..013125c5 100644 --- a/platforms/artic/runtime.impala +++ b/platforms/artic/runtime.impala @@ -31,6 +31,8 @@ #[import(cc = "C", name = "anydsl_print_string")] fn print_string(_: &[u8]) -> (); #[import(cc = "C", name = "anydsl_print_flush")] fn print_flush() -> (); +#[import(cc = "C", name = "anydsl_load_offloaded")] fn runtime_load_offloaded(_device: i32, _filename: &[u8], _name: &[u8], _size: &mut u64) -> &[u8]; + // TODO //struct Buffer[T] { // data : &mut [T], From dbad9a42dfde14b0a27e74c71638903c0a38e46a Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 24 Oct 2025 20:41:38 +0200 Subject: [PATCH 43/43] fix some spirv intrinsics --- platforms/artic/intrinsics_spirv.impala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platforms/artic/intrinsics_spirv.impala b/platforms/artic/intrinsics_spirv.impala index 1ac28017..793f2da2 100644 --- a/platforms/artic/intrinsics_spirv.impala +++ b/platforms/artic/intrinsics_spirv.impala @@ -1,4 +1,4 @@ #[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T; -#[import(cc = "device", name = "spirv.builtin")] fn spirv_make_global_variable[T]() -> T; -#[import(cc = "device", name = "spirv.builtin")] fn spirv_decorate_literal[T](T, u32, u32) -> T; +#[import(cc = "device", name = "spirv.global")] fn spirv_make_global_variable[T]() -> T; +#[import(cc = "device", name = "spirv.decorate")] fn spirv_decorate_literal[T](T, u32, u32) -> ();