Skip to content

Commit 8a3856f

Browse files
committed
vulkan: Add memory detection for Intel GPU using Level Zero Sysman (PR ollama#12654)
Applied PR ollama#12654 which adds Intel GPU memory detection support using Level Zero Sysman API. Changes: - Added mem_l0_sysman.cpp for Level Zero Sysman integration - Updated CMakeLists.txt to include Level Zero support - Enhanced ggml-vulkan.cpp with improved GPU memory detection - Added Level Zero runtime libraries to Docker build - Created patch file 0032 for Level Zero integration This improves Vulkan memory detection accuracy for Intel integrated and discrete GPUs.
1 parent f9422dc commit 8a3856f

File tree

7 files changed

+893
-9
lines changed

7 files changed

+893
-9
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ logs/crash
2424
.ccache/
2525

2626

27+
temp_pr12654.patch

Dockerfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ RUN --mount=type=cache,target=/root/.ccache \
122122
cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
123123
&& cmake --build --parallel --preset 'Vulkan' \
124124
&& cmake --install build --component Vulkan --strip --parallel 8
125+
# Install Intel oneAPI Level Zero runtime
126+
# https://dgpu-docs.intel.com/driver/installation-lts2.html
127+
RUN rpm --import https://repositories.intel.com/gpu/intel-graphics.key \
128+
&& dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/8.10/lts/2523/unified/intel-gpu-8.10.repo \
129+
&& dnf install -y level-zero intel-level-zero-gpu intel-gmmlib
125130

126131

127132
FROM base AS build
@@ -143,6 +148,12 @@ FROM --platform=linux/amd64 scratch AS amd64
143148
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
144149
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
145150
COPY --from=vulkan dist/lib/ollama /lib/ollama/
151+
# Copy over minimal Intel oneAPI Level Zero runtime libraries to run Level Zero Sysman
152+
COPY --from=vulkan /usr/lib64/libze_loader.so.1 /lib/ollama/level_zero/
153+
COPY --from=vulkan /usr/lib64/libze_intel_gpu.so.1 /lib/ollama/level_zero/
154+
COPY --from=vulkan /usr/lib64/libigdgmm.so.12 /lib/ollama/level_zero/
155+
COPY --from=vulkan /usr/lib64/libze_tracing_layer.so.1 /lib/ollama/level_zero/
156+
COPY --from=vulkan /usr/lib64/libze_validation_layer.so.1 /lib/ollama/level_zero/
146157

147158
FROM --platform=linux/arm64 scratch AS arm64
148159
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
@@ -192,7 +203,7 @@ RUN apt-get update \
192203
COPY --from=archive /bin /usr/bin
193204
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
194205
COPY --from=archive /lib/ollama /usr/lib/ollama
195-
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
206+
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/lib/ollama/level_zero/
196207
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
197208
ENV NVIDIA_VISIBLE_DEVICES=all
198209
ENV OLLAMA_HOST=0.0.0.0:11434

llama/patches/0032-Add-memory-detection-for-Intel-GPU-using-Level-Zero.patch

Lines changed: 478 additions & 0 deletions
Large diffs are not rendered by default.

ml/backend/ggml/ggml/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ add_library(ggml-base
211211
ggml-quants.h
212212
mem_hip.cpp
213213
mem_nvml.cpp
214+
mem_l0_sysman.cpp
214215
gguf.cpp)
215216

216217
target_include_directories(ggml-base PRIVATE .)

ml/backend/ggml/ggml/src/ggml-impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,9 @@ GGML_API void ggml_nvml_release();
645645
GGML_API int ggml_hip_mgmt_init();
646646
GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
647647
GGML_API void ggml_hip_mgmt_release();
648+
GGML_API int ggml_l0_sysman_init();
649+
GGML_API int ggml_l0_sysman_get_device_memory(const char *uuid, size_t *free, size_t *total);
650+
GGML_API void ggml_l0_sysman_release();
648651

649652
#ifdef __cplusplus
650653
}

ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12450,11 +12450,11 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
1245012450
vk::PhysicalDeviceProperties2 props2;
1245112451
vkdev.getProperties2(&props2);
1245212452

12453-
if (!ctx->is_integrated_gpu)
12454-
{
12455-
// Use vendor specific management libraries for best VRAM reporting if available
12456-
switch (props2.properties.vendorID) {
12457-
case VK_VENDOR_ID_AMD:
12453+
// Use vendor specific management libraries for best VRAM reporting if available
12454+
switch (props2.properties.vendorID) {
12455+
case VK_VENDOR_ID_AMD:
12456+
if (!ctx->is_integrated_gpu)
12457+
{
1245812458
if (ggml_hip_mgmt_init() == 0) {
1245912459
int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
1246012460
if (status == 0) {
@@ -12464,8 +12464,11 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
1246412464
}
1246512465
ggml_hip_mgmt_release();
1246612466
}
12467-
break;
12468-
case VK_VENDOR_ID_NVIDIA:
12467+
}
12468+
break;
12469+
case VK_VENDOR_ID_NVIDIA:
12470+
if (!ctx->is_integrated_gpu)
12471+
{
1246912472
if (ggml_nvml_init() == 0) {
1247012473
int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
1247112474
if (status == 0) {
@@ -12475,8 +12478,23 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
1247512478
}
1247612479
ggml_nvml_release();
1247712480
}
12478-
break;
1247912481
}
12482+
break;
12483+
case VK_VENDOR_ID_INTEL:
12484+
// L0 sysman can support both iGPU and dGPU on Windows and Linux.
12485+
// If the driver is old on Windows we will fail to get memory info for iGPU.
12486+
// For Linux you need to run ollama with `sudo` or run `sudo setcap cap_perfmon=+ep /path/to/ollama_binary`
12487+
// to apply perfmon privilege to the ollama binary
12488+
if (ggml_l0_sysman_init() == 0) {
12489+
int status = ggml_l0_sysman_get_device_memory(ctx->uuid.c_str(), free, total);
12490+
if (status == 0) {
12491+
GGML_LOG_DEBUG("%s utilizing Level Zero Sysman memory reporting free: %zu total: %zu\n", __func__, *free, *total);
12492+
ggml_l0_sysman_release();
12493+
return;
12494+
}
12495+
ggml_l0_sysman_release();
12496+
}
12497+
break;
1248012498
}
1248112499
// else fallback to memory budget if supported
1248212500

0 commit comments

Comments
 (0)