Skip to content

Commit 8691585

Browse files
authored
fix: improve CUDA compilation (#66)
1 parent eb61383 commit 8691585

File tree

7 files changed

+47
-10
lines changed

7 files changed

+47
-10
lines changed

llama/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.12)
33
project ("llama-addon")
44

55
if (MSVC)
6-
add_compile_options(/EHsc)
6+
# add_compile_options(/EHsc)
77
else()
88
add_compile_options(-fexceptions)
99
endif()

src/cli/commands/ChatCommand.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type ChatCommand = {
2828
temperature: number,
2929
topK: number,
3030
topP: number,
31+
gpuLayers?: number,
3132
repeatPenalty: number,
3233
lastTokensRepeatPenalty: number,
3334
penalizeRepeatingNewLine: boolean,
@@ -122,6 +123,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
122123
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`.",
123124
group: "Optional:"
124125
})
126+
.option("gpuLayers", {
127+
alias: "gl",
128+
type: "number",
129+
description: "number of layers to store in VRAM",
130+
group: "Optional:"
131+
})
125132
.option("repeatPenalty", {
126133
alias: "rp",
127134
type: "number",
@@ -165,12 +172,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
165172
},
166173
async handler({
167174
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
168-
grammar, threads, temperature, topK, topP, repeatPenalty,
175+
grammar, threads, temperature, topK, topP, gpuLayers, repeatPenalty,
169176
lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
170177
}) {
171178
try {
172179
await RunChat({
173-
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, threads, temperature, topK, topP,
180+
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, threads, temperature, topK, topP, gpuLayers,
174181
lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
175182
});
176183
} catch (err) {
@@ -183,7 +190,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
183190

184191
async function RunChat({
185192
model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg, threads, temperature, topK, topP,
186-
lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
193+
gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
187194
}: ChatCommand) {
188195
const {LlamaChatSession} = await import("../../llamaEvaluator/LlamaChatSession.js");
189196
const {LlamaModel} = await import("../../llamaEvaluator/LlamaModel.js");
@@ -192,7 +199,8 @@ async function RunChat({
192199

193200
let initialPrompt = prompt ?? null;
194201
const model = new LlamaModel({
195-
modelPath: path.resolve(process.cwd(), modelArg)
202+
modelPath: path.resolve(process.cwd(), modelArg),
203+
gpuLayers: gpuLayers != null ? gpuLayers : undefined
196204
});
197205
const context = new LlamaContext({
198206
model,

src/cli/commands/ClearCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {CommandModule} from "yargs";
2-
import * as fs from "fs-extra";
2+
import fs from "fs-extra";
33
import chalk from "chalk";
44
import {llamaCppDirectory} from "../../config.js";
55
import withOra from "../../utils/withOra.js";

src/utils/clearLlamaBuild.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import path from "path";
2-
import * as fs from "fs-extra";
2+
import fs from "fs-extra";
33
import {llamaDirectory} from "../config.js";
44
import {clearTempFolder} from "./clearTempFolder.js";
55

src/utils/clearTempFolder.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import process from "process";
2-
import * as fs from "fs-extra";
2+
import fs from "fs-extra";
33
import {tempDownloadDirectory} from "../config.js";
44

55
export async function clearTempFolder() {

src/utils/cmake.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import path from "path";
2-
import * as fs from "fs-extra";
2+
import fs from "fs-extra";
33
import which from "which";
44
import chalk from "chalk";
55
import {chmodr} from "chmodrp";
@@ -29,10 +29,13 @@ export async function getCmakePath() {
2929
} catch (err) {}
3030

3131
try {
32-
const resolvedPath = await which("cmake", {
32+
let resolvedPath = await which("cmake", {
3333
path: path.join(llamaDirectory, "xpack", "xpacks", ".bin")
3434
});
3535

36+
if (resolvedPath.toLowerCase().endsWith(".cmd"))
37+
resolvedPath = (await getBinFromWindowCmd(resolvedPath, "cmake.exe")) ?? "";
38+
3639
if (resolvedPath !== "")
3740
return resolvedPath;
3841
} catch (err) {}
@@ -89,3 +92,26 @@ async function downloadCmake() {
8992
await fs.remove(localXpacksCacheDirectory);
9093
await fixXpackPermissions();
9194
}
95+
96+
async function getBinFromWindowCmd(cmdFilePath: string, binName: string) {
97+
const fileContent: string = await fs.readFile(cmdFilePath, "utf8");
98+
const lowercaseFileContent = fileContent.toLowerCase();
99+
100+
if (!lowercaseFileContent.includes(binName))
101+
return null;
102+
103+
const lastIndexOfBinName = lowercaseFileContent.lastIndexOf(binName);
104+
const characterAfterBinName = fileContent[lastIndexOfBinName + binName.length];
105+
106+
if (characterAfterBinName !== '"' && characterAfterBinName !== "'")
107+
return null;
108+
109+
const startStringCharacter = fileContent.lastIndexOf(characterAfterBinName, lastIndexOfBinName);
110+
111+
const binPath = fileContent.slice(startStringCharacter + 1, lastIndexOfBinName + binName.length);
112+
113+
if (!await fs.pathExists(binPath))
114+
return null;
115+
116+
return binPath;
117+
}

src/utils/compileLLamaCpp.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ export async function compileLlamaCpp({
3030
else cmakeCustomOptions.push("LLAMA_METAL=OFF");
3131

3232
if (cuda || process.env.LLAMA_CUBLAS === "1") cmakeCustomOptions.push("LLAMA_CUBLAS=1");
33+
if (cuda && process.env.CUDA_PATH != null && await fs.pathExists(process.env.CUDA_PATH))
34+
cmakeCustomOptions.push("CMAKE_GENERATOR_TOOLSET=" + process.env.CUDA_PATH);
35+
3336
if (process.env.LLAMA_MPI === "1") cmakeCustomOptions.push("LLAMA_MPI=1");
3437
if (process.env.LLAMA_OPENBLAS === "1") cmakeCustomOptions.push("LLAMA_OPENBLAS=1");
3538
if (process.env.LLAMA_BLAS_VENDOR != null) cmakeCustomOptions.push("LLAMA_BLAS_VENDOR=" + process.env.LLAMA_BLAS_VENDOR);

0 commit comments

Comments
 (0)