Skip to content

Commit 595a6bc

Browse files
authored
fix: adapt to breaking changes of llama.cpp (#117)
1 parent ceb538d commit 595a6bc

File tree

5 files changed

+17
-32
lines changed

5 files changed

+17
-32
lines changed

llama/addon.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
152152
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
153153
}
154154

155-
if (options.Has("f16Kv")) {
156-
context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
157-
}
158-
159155
if (options.Has("logitsAll")) {
160156
context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
161157
}

src/llamaEvaluator/LlamaContext.ts

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ export type LlamaContextOptions = {
2626
/** prompt processing batch size */
2727
batchSize?: number,
2828

29-
/** use fp16 for KV cache */
30-
f16Kv?: boolean,
31-
3229
/** the llama_eval() call computes all logits, not just the last one */
3330
logitsAll?: boolean,
3431

@@ -85,7 +82,6 @@ export class LlamaContext {
8582
seed = model._contextOptions.seed,
8683
contextSize = model._contextOptions.contextSize,
8784
batchSize = model._contextOptions.batchSize,
88-
f16Kv = model._contextOptions.f16Kv,
8985
logitsAll = model._contextOptions.logitsAll,
9086
embedding = model._contextOptions.embedding,
9187
threads = model._contextOptions.threads
@@ -95,7 +91,6 @@ export class LlamaContext {
9591
seed: seed != null ? Math.max(-1, seed) : undefined,
9692
contextSize,
9793
batchSize,
98-
f16Kv,
9994
logitsAll,
10095
embedding,
10196
threads

src/llamaEvaluator/LlamaModel.ts

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,6 @@ export type LlamaModelOptions = {
7676
*/
7777
topP?: number,
7878

79-
/**
80-
* use fp16 for KV cache
81-
* @deprecated use the `f16Kv` option on `LlamaContext` instead
82-
* @hidden
83-
*/
84-
f16Kv?: boolean,
85-
8679
/**
8780
* the llama_eval() call computes all logits, not just the last one
8881
* @deprecated use the `logitsAll` option on `LlamaContext` instead
@@ -116,7 +109,6 @@ export class LlamaModel {
116109
seed: LlamaModelOptions["seed"],
117110
contextSize: LlamaModelOptions["contextSize"],
118111
batchSize: LlamaModelOptions["batchSize"],
119-
f16Kv: LlamaModelOptions["f16Kv"],
120112
logitsAll: LlamaModelOptions["logitsAll"],
121113
embedding: LlamaModelOptions["embedding"],
122114
threads: LlamaModelOptions["threads"]
@@ -160,7 +152,6 @@ export class LlamaModel {
160152
* Set to `1` to disable.
161153
*
162154
* Only relevant when `temperature` is set to a value greater than `0`.
163-
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
164155
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
165156
* @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
166157
* @param {boolean} [options.useMmap] - use mmap if possible
@@ -169,7 +160,7 @@ export class LlamaModel {
169160
*/
170161
public constructor({
171162
modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers,
172-
threads = 6, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding
163+
threads = 6, temperature = 0, topK = 40, topP = 0.95, logitsAll, vocabOnly, useMmap, useMlock, embedding
173164
}: LlamaModelOptions) {
174165
this._model = new LLAMAModel(path.resolve(process.cwd(), modelPath), removeNullFields({
175166
gpuLayers,
@@ -182,7 +173,6 @@ export class LlamaModel {
182173
seed,
183174
contextSize,
184175
batchSize,
185-
f16Kv,
186176
logitsAll,
187177
embedding,
188178
threads

src/utils/compileLLamaCpp.ts

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,24 @@ export async function compileLlamaCpp({
6868
__dirname
6969
);
7070

71-
const binFilesDirPath = path.join(llamaDirectory, "build", "llama.cpp", "bin");
71+
const binFilesDirPaths = [
72+
path.join(llamaDirectory, "build", "bin"),
73+
path.join(llamaDirectory, "build", "llama.cpp", "bin")
74+
];
7275
const compiledResultDirPath = await getCompiledResultDir(true);
7376

74-
if (await fs.pathExists(binFilesDirPath)) {
75-
const files = await fs.readdir(binFilesDirPath);
76-
77-
await Promise.all(
78-
files.map((fileName) => (
79-
fs.copy(path.join(binFilesDirPath, fileName), path.join(compiledResultDirPath, fileName), {
80-
overwrite: false
81-
})
82-
))
83-
);
77+
for (const binFilesDirPath of binFilesDirPaths) {
78+
if (await fs.pathExists(binFilesDirPath)) {
79+
const files = await fs.readdir(binFilesDirPath);
80+
81+
await Promise.all(
82+
files.map((fileName) => (
83+
fs.copy(path.join(binFilesDirPath, fileName), path.join(compiledResultDirPath, fileName), {
84+
overwrite: false
85+
})
86+
))
87+
);
88+
}
8489
}
8590

8691
if (setUsedBinFlagArg) {

src/utils/getBin.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ export type LLAMAContext = {
117117
seed?: number,
118118
contextSize?: number,
119119
batchSize?: number,
120-
f16Kv?: boolean,
121120
logitsAll?: boolean,
122121
embedding?: boolean,
123122
threads?: number,

0 commit comments

Comments
 (0)