From 586ce5906256d6c2d7c85518750135d3eed30623 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Thu, 4 Sep 2025 19:31:33 +0300 Subject: [PATCH 1/4] fix(Vulkan): read external memory usage --- llama/addon/globals/addonLog.h | 1 + llama/gpuInfo/vulkan-gpu-info.cpp | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llama/addon/globals/addonLog.h b/llama/addon/globals/addonLog.h index 26542880..cc15681e 100644 --- a/llama/addon/globals/addonLog.h +++ b/llama/addon/globals/addonLog.h @@ -2,6 +2,7 @@ #include "llama.h" #include "napi.h" + struct addon_logger_log { public: const int logLevelNumber; diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index c1c42316..72beb2b9 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -42,18 +42,17 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM physicalDevice.getMemoryProperties2(&memProps2); for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) { - const auto flags = memProps.memoryHeaps[i].flags; + const auto heap = memProps2.memoryProperties.memoryHeaps[i]; - if (flags & vk::MemoryHeapFlagBits::eDeviceLocal) { - const auto size = memProps.memoryHeaps[i].size; - totalMem += size; - usedMem += memoryBudgetProperties.heapUsage[i]; + if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) { + totalMem += heap.size; + usedMem += memoryBudgetProperties.heapUsage[i] + (heap.size - memoryBudgetProperties.heapBudget[i]); - if (flags & vk::MemoryHeapFlagBits::eMultiInstance) { - totalUnifiedMemorySize += size; + if (heap.flags & vk::MemoryHeapFlagBits::eMultiInstance) { + totalUnifiedMemorySize += heap.size; } - if (size > 0 && addDeviceNames) { + if (heap.size > 0 && addDeviceNames) { (*deviceNames).push_back(std::string(deviceProps.deviceName.data())); } From afc029490cad3c4da364a1b945e0514b15f2f24f Mon Sep 17 00:00:00 2001 From: Gilad S Date: Thu, 4 Sep 2025 19:32:28 +0300 Subject: [PATCH 2/4] fix(Electron template): minor performance improvement --- templates/electron-typescript-react/electron/state/llmState.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/electron-typescript-react/electron/state/llmState.ts b/templates/electron-typescript-react/electron/state/llmState.ts index 21558b52..acb8d435 100644 --- a/templates/electron-typescript-react/electron/state/llmState.ts +++ b/templates/electron-typescript-react/electron/state/llmState.ts @@ -307,6 +307,7 @@ export const llmFunctions = { try { await chatSession?.preloadPrompt("", { + functions: modelFunctions, // these won't be called, but are used to avoid redundant context shifts signal: promptAbortController?.signal }); } catch (err) { From 825172d68b1b8c14482be45dfca55d1f19e2fb65 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Thu, 4 Sep 2025 20:04:49 +0300 Subject: [PATCH 3/4] fix: adapt to `llama.cpp` breaking changes --- llama/addon/AddonContext.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llama/addon/AddonContext.cpp b/llama/addon/AddonContext.cpp index 2d02326e..afda2177 100644 --- a/llama/addon/AddonContext.cpp +++ b/llama/addon/AddonContext.cpp @@ -420,7 +420,16 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap().Value(); + if (options.Get("flashAttention").IsBoolean()) { + bool value = options.Get("flashAttention").As().Value(); + if (value) { + context_params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED; + } else { + context_params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED; + } + } else { + context_params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO; + } } if (options.Has("threads")) { From 5f21e4f2388bf100b6e4c3dcaa11d608cd13351f Mon Sep 17 00:00:00 2001 From: "Gilad S." <7817232+giladgd@users.noreply.github.com> Date: Thu, 4 Sep 2025 20:45:50 +0300 Subject: [PATCH 4/4] build: increase timeout for CUDA 13 installation --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 59ad77be..3dcf2a1b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,7 +133,7 @@ jobs: - name: Install Cuda 13.0 on Windows (1) if: matrix.config.name == 'Windows (1)' shell: bash - timeout-minutes: 30 + timeout-minutes: 60 run: | curl -Lo cuda_13.0.0_windows_network.exe https://developer.download.nvidia.com/compute/cuda/13.0.0/network_installers/cuda_13.0.0_windows_network.exe