Skip to content

Commit 3b195db

Browse files
committed
feat: use RAM and swap sizes in memory usage estimations
1 parent 16c433d commit 3b195db

21 files changed

+2135
-700
lines changed

llama/addon/addon.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "globals/addonLog.h"
99
#include "globals/addonProgress.h"
1010
#include "globals/getGpuInfo.h"
11+
#include "globals/getSwapInfo.h"
1112

1213
bool backendInitialized = false;
1314
bool backendDisposed = false;
@@ -203,6 +204,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
203204
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
204205
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
205206
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
207+
Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
206208
Napi::PropertyDescriptor::Function("init", addonInit),
207209
Napi::PropertyDescriptor::Function("dispose", addonDispose),
208210
});

llama/addon/globals/getGpuInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ void logVulkanWarning(const char* message) {
2626
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
2727
uint64_t total = 0;
2828
uint64_t used = 0;
29+
uint64_t unifiedVramSize = 0;
2930

3031
#ifdef GPU_INFO_USE_CUDA
3132
size_t cudaDeviceTotal = 0;
@@ -41,26 +42,31 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
4142
#ifdef GPU_INFO_USE_VULKAN
4243
uint64_t vulkanDeviceTotal = 0;
4344
uint64_t vulkanDeviceUsed = 0;
44-
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
45+
uint64_t vulkanDeviceUnifiedVramSize = 0;
46+
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
4547

4648
if (vulkanDeviceSupportsMemoryBudgetExtension) {
4749
total += vulkanDeviceTotal;
4850
used += vulkanDeviceUsed;
51+
unifiedVramSize += vulkanDeviceUnifiedVramSize;
4952
}
5053
#endif
5154

5255
#ifdef GPU_INFO_USE_METAL
5356
uint64_t metalDeviceTotal = 0;
5457
uint64_t metalDeviceUsed = 0;
55-
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
58+
uint64_t metalDeviceUnifiedVramSize = 0;
59+
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed, &metalDeviceUnifiedVramSize);
5660

5761
total += metalDeviceTotal;
5862
used += metalDeviceUsed;
63+
unifiedVramSize += metalDeviceUnifiedVramSize;
5964
#endif
6065

6166
Napi::Object result = Napi::Object::New(info.Env());
6267
result.Set("total", Napi::Number::From(info.Env(), total));
6368
result.Set("used", Napi::Number::From(info.Env(), used));
69+
result.Set("unifiedSize", Napi::Number::From(info.Env(), unifiedVramSize));
6470

6571
return result;
6672
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#include "getSwapInfo.h"
2+
#include "addonLog.h"
3+
4+
#ifdef __APPLE__
5+
#include <iostream>
6+
#include <mach/mach.h>
7+
#include <sys/sysctl.h>
8+
#elif __linux__
9+
#include <iostream>
10+
#include <sys/sysinfo.h>
11+
#elif _WIN32
12+
#include <iostream>
13+
#include <windows.h>
14+
#include <psapi.h>
15+
#endif
16+
17+
18+
Napi::Value getSwapInfo(const Napi::CallbackInfo& info) {
19+
uint64_t totalSwap = 0;
20+
uint64_t freeSwap = 0;
21+
uint64_t maxSize = 0;
22+
bool maxSizeSet = true;
23+
24+
#ifdef __APPLE__
25+
struct xsw_usage swapInfo;
26+
size_t size = sizeof(swapInfo);
27+
28+
if (sysctlbyname("vm.swapusage", &swapInfo, &size, NULL, 0) == 0) {
29+
totalSwap = swapInfo.xsu_total;
30+
freeSwap = swapInfo.xsu_avail;
31+
maxSizeSet = false;
32+
} else {
33+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
34+
}
35+
#elif __linux__
36+
struct sysinfo sysInfo;
37+
38+
if (sysinfo(&sysInfo) == 0) {
39+
totalSwap = sysInfo.totalswap;
40+
freeSwap = sysInfo.freeswap;
41+
maxSize = sysInfo.totalswap;
42+
} else {
43+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
44+
}
45+
#elif _WIN32
46+
MEMORYSTATUSEX memInfo;
47+
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
48+
49+
if (GlobalMemoryStatusEx(&memInfo)) {
50+
PERFORMANCE_INFORMATION perfInfo;
51+
perfInfo.cb = sizeof(PERFORMANCE_INFORMATION);
52+
if (GetPerformanceInfo(&perfInfo, sizeof(perfInfo))) {
53+
totalSwap = memInfo.ullTotalPageFile;
54+
freeSwap = memInfo.ullAvailPageFile;
55+
maxSize = perfInfo.CommitLimit * perfInfo.PageSize;
56+
} else {
57+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get max pagefile size").c_str(), nullptr);
58+
}
59+
} else {
60+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get pagefile info").c_str(), nullptr);
61+
}
62+
#endif
63+
64+
Napi::Object obj = Napi::Object::New(info.Env());
65+
obj.Set("total", Napi::Number::New(info.Env(), totalSwap));
66+
obj.Set("free", Napi::Number::New(info.Env(), freeSwap));
67+
obj.Set("maxSize", Napi::Number::New(info.Env(), maxSizeSet ? maxSize : -1));
68+
return obj;
69+
}

llama/addon/globals/getSwapInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#pragma once
2+
#include "napi.h"
3+
4+
Napi::Value getSwapInfo(const Napi::CallbackInfo& info);

llama/gpuInfo/metal-gpu-info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
#include <string>
55
#include <vector>
66

7-
void getMetalGpuInfo(uint64_t * total, uint64_t * used);
7+
void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize);
88
void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames);

llama/gpuInfo/metal-gpu-info.mm

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,22 @@
33
#include <string>
44
#import <Metal/Metal.h>
55

6-
void getMetalGpuInfo(uint64_t * total, uint64_t * used) {
6+
void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize) {
77
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
88

99
if (device) {
1010
*total = device.recommendedMaxWorkingSetSize;
1111
*used = device.currentAllocatedSize;
12+
13+
if (device.hasUnifiedMemory) {
14+
*unifiedMemorySize = device.recommendedMaxWorkingSetSize;
15+
} else {
16+
*unifiedMemorySize = 0;
17+
}
1218
} else {
1319
*total = 0;
1420
*used = 0;
21+
*unifiedMemorySize = 0;
1522
}
1623

1724
[device release];

llama/gpuInfo/vulkan-gpu-info.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
77

8-
static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
8+
static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
99
vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
1010
vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
1111
vk::Instance instance = vk::createInstance(createInfo);
@@ -14,6 +14,7 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNa
1414

1515
size_t usedMem = 0;
1616
size_t totalMem = 0;
17+
size_t totalUnifiedMemorySize = 0;
1718

1819
for (size_t i = 0; i < physicalDevices.size(); i++) {
1920
vk::PhysicalDevice physicalDevice = physicalDevices[i];
@@ -41,43 +42,49 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNa
4142
physicalDevice.getMemoryProperties2(&memProps2);
4243

4344
for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
44-
if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
45+
const auto flags = memProps.memoryHeaps[i].flags;
46+
47+
if (flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
4548
const auto size = memProps.memoryHeaps[i].size;
4649
totalMem += size;
4750
usedMem += memoryBudgetProperties.heapUsage[i];
4851

52+
if (flags & vk::MemoryHeapFlagBits::eMultiInstance) {
53+
totalUnifiedMemorySize += size;
54+
}
55+
4956
if (size > 0 && addDeviceNames) {
5057
(*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
5158
}
52-
53-
break;
5459
}
5560
}
5661
} else {
5762
// VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
5863
warningLogCallback(
5964
(
6065
"Vulkan VK_EXT_memory_budget extension not supported for device \"" +
61-
std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it"
62-
)
63-
.c_str()
66+
std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determined for it"
67+
).c_str()
6468
);
6569
return false;
6670
}
6771
}
6872

6973
*total = totalMem;
7074
*used = usedMem;
75+
*unifiedMemorySize = totalUnifiedMemorySize;
76+
7177
return true;
7278
}
7379

74-
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
75-
return enumerateVulkanDevices(total, used, false, nullptr, warningLogCallback);
80+
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
81+
return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback);
7682
}
7783

7884
bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
7985
size_t vulkanDeviceTotal = 0;
8086
size_t vulkanDeviceUsed = 0;
87+
size_t unifiedMemorySize = 0;
8188

82-
return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, true, deviceNames, warningLogCallback);
89+
return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, &unifiedMemorySize, true, deviceNames, warningLogCallback);
8390
}

llama/gpuInfo/vulkan-gpu-info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55

66
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
77

8-
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
8+
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
99
bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);

src/bindings/AddonTypes.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,18 @@ export type BindingModule = {
6363
setLoggerLogLevel(level: number): void,
6464
getGpuVramInfo(): {
6565
total: number,
66-
used: number
66+
used: number,
67+
unifiedSize: number
6768
},
6869
getGpuDeviceInfo(): {
6970
deviceNames: string[]
7071
},
7172
getGpuType(): "cuda" | "vulkan" | "metal" | undefined,
73+
getSwapInfo(): {
74+
total: number,
75+
maxSize: number,
76+
free: number
77+
},
7278
init(): Promise<void>,
7379
dispose(): Promise<void>
7480
};

0 commit comments

Comments
 (0)