Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)

option(GGML_VULKAN_V1_2_162 "llama: make GGML compatible with vulkan v1.2.162" OFF)
if (GGML_VULKAN_V1_2_162 AND GGML_VULKAN)
add_definitions(-DGGML_VULKAN_V1_2_162)
endif()

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)

Expand Down
24 changes: 24 additions & 0 deletions ggml/src/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
#define VK_LOG_DEBUG(msg) ((void) 0)
#endif // GGML_VULKAN_DEBUG

#ifdef GGML_VULKAN_V1_2_162
#ifdef VK_NULL_HANDLE
#undef VK_NULL_HANDLE
#define VK_NULL_HANDLE nullptr
#endif // VK_NULL_HANDLE
#endif // GGML_VULKAN_V1_2_162

struct ggml_backend_vk_context;

struct vk_queue {
Expand Down Expand Up @@ -865,6 +872,16 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) {
tl_signal_vals[idx].push_back(submission.signal_semaphores[i].value);
tl_signal_semaphores[idx].push_back(submission.signal_semaphores[i].s);
}
#if defined(GGML_VULKAN_V1_2_162)
vk::TimelineSemaphoreSubmitInfo timeline_info(
(uint32_t) submission.wait_semaphores.size(),
tl_wait_vals[idx].data(),
(uint32_t) submission.signal_semaphores.size(),
tl_signal_vals[idx].data()
);
timeline_info.setPNext(nullptr);
tl_submit_infos.push_back(timeline_info);
#else
tl_submit_infos.push_back({
(uint32_t) submission.wait_semaphores.size(),
tl_wait_vals[idx].data(),
Expand All @@ -873,6 +890,7 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) {
});
tl_submit_infos[idx].sType = vk::StructureType::eTimelineSemaphoreSubmitInfo;
tl_submit_infos[idx].pNext = nullptr;
#endif
vk::SubmitInfo si{
(uint32_t) submission.wait_semaphores.size(),
tl_wait_semaphores[idx].data(),
Expand Down Expand Up @@ -1846,22 +1864,28 @@ static vk_device ggml_vk_get_device(size_t idx) {

vk::PhysicalDeviceProperties2 props2;
vk::PhysicalDeviceMaintenance3Properties props3;
#ifndef GGML_VULKAN_V1_2_162
vk::PhysicalDeviceMaintenance4Properties props4;
#endif // GGML_VULKAN_V1_2_162
vk::PhysicalDeviceSubgroupProperties subgroup_props;
props2.pNext = &props3;
props3.pNext = &subgroup_props;
#ifndef GGML_VULKAN_V1_2_162
if (maintenance4_support) {
subgroup_props.pNext = &props4;
}
#endif // GGML_VULKAN_V1_2_162
device->physical_device.getProperties2(&props2);
device->properties = props2.properties;

const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");

if (GGML_VK_FORCE_MAX_ALLOCATION_SIZE != nullptr) {
device->max_memory_allocation_size = std::stoi(GGML_VK_FORCE_MAX_ALLOCATION_SIZE);
#ifndef GGML_VULKAN_V1_2_162
} else if (maintenance4_support) {
device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize);
#endif // GGML_VULKAN_V1_2_162
} else {
device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
}
Expand Down