Skip to content

Commit f5b56b4

Browse files
committed
fix(Vulkan): context creation edge cases
1 parent f849cd9 commit f5b56b4

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

llama/addon/globals/getGpuInfo.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,13 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
5454
// this means that we counted memory from devices that aren't used by llama.cpp
5555
vulkanDeviceUnifiedVramSize = 0;
5656
}
57-
57+
5858
unifiedVramSize += vulkanDeviceUnifiedVramSize;
5959
}
60+
61+
if (used == 0 && vulkanDeviceUsed != 0) {
62+
used = vulkanDeviceUsed;
63+
}
6064
#endif
6165

6266
Napi::Object result = Napi::Object::New(info.Env());
@@ -93,7 +97,7 @@ std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
9397
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
9498
ggml_backend_dev_t device = ggml_backend_dev_get(i);
9599
const auto deviceName = std::string(ggml_backend_dev_name(device));
96-
100+
97101
if (deviceName == "Metal") {
98102
return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
99103
} else if (std::string(deviceName).find("Vulkan") == 0) {
@@ -106,7 +110,7 @@ std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
106110
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
107111
ggml_backend_dev_t device = ggml_backend_dev_get(i);
108112
const auto deviceName = std::string(ggml_backend_dev_name(device));
109-
113+
110114
if (deviceName == "CPU") {
111115
return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
112116
}
@@ -119,7 +123,7 @@ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
119123
const auto gpuDeviceRes = getGpuDevice();
120124
const auto device = gpuDeviceRes.first;
121125
const auto deviceType = gpuDeviceRes.second;
122-
126+
123127
if (deviceType == "cpu") {
124128
return Napi::Boolean::New(info.Env(), false);
125129
} else if (device != nullptr && deviceType != "") {

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ const defaultLoraScale = 1;
2828
const shrinkRetriesMinContextSize = 4096;
2929
const defaultMaxPunishTokens = 64;
3030
const defaultFailedCreationRemedy = {
31-
retries: 6,
31+
retries: 16,
3232
autoContextSizeShrink: 0.16
3333
} as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>;
3434
const defaultEvaluationPriority: EvaluationPriority = 5;

0 commit comments

Comments
 (0)