@@ -76,13 +76,6 @@ export type LlamaModelOptions = {
76
76
*/
77
77
topP ?: number ,
78
78
79
- /**
80
- * use fp16 for KV cache
81
- * @deprecated use the `f16Kv` option on `LlamaContext` instead
82
- * @hidden
83
- */
84
- f16Kv ?: boolean ,
85
-
86
79
/**
87
80
* the llama_eval() call computes all logits, not just the last one
88
81
* @deprecated use the `logitsAll` option on `LlamaContext` instead
@@ -116,7 +109,6 @@ export class LlamaModel {
116
109
seed : LlamaModelOptions [ "seed" ] ,
117
110
contextSize : LlamaModelOptions [ "contextSize" ] ,
118
111
batchSize : LlamaModelOptions [ "batchSize" ] ,
119
- f16Kv : LlamaModelOptions [ "f16Kv" ] ,
120
112
logitsAll : LlamaModelOptions [ "logitsAll" ] ,
121
113
embedding : LlamaModelOptions [ "embedding" ] ,
122
114
threads : LlamaModelOptions [ "threads" ]
@@ -160,7 +152,6 @@ export class LlamaModel {
160
152
* Set to `1` to disable.
161
153
*
162
154
* Only relevant when `temperature` is set to a value greater than `0`.
163
- * @param {boolean } [options.f16Kv] - use fp16 for KV cache
164
155
* @param {boolean } [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
165
156
* @param {boolean } [options.vocabOnly] - only load the vocabulary, no weights
166
157
* @param {boolean } [options.useMmap] - use mmap if possible
@@ -169,7 +160,7 @@ export class LlamaModel {
169
160
*/
170
161
public constructor ( {
171
162
modelPath, seed = null , contextSize = 1024 * 4 , batchSize, gpuLayers,
172
- threads = 6 , temperature = 0 , topK = 40 , topP = 0.95 , f16Kv , logitsAll, vocabOnly, useMmap, useMlock, embedding
163
+ threads = 6 , temperature = 0 , topK = 40 , topP = 0.95 , logitsAll, vocabOnly, useMmap, useMlock, embedding
173
164
} : LlamaModelOptions ) {
174
165
this . _model = new LLAMAModel ( path . resolve ( process . cwd ( ) , modelPath ) , removeNullFields ( {
175
166
gpuLayers,
@@ -182,7 +173,6 @@ export class LlamaModel {
182
173
seed,
183
174
contextSize,
184
175
batchSize,
185
- f16Kv,
186
176
logitsAll,
187
177
embedding,
188
178
threads
0 commit comments