Skip to content

Commit a2b2bc3

Browse files
authored
fix: CUDA context creation (#285)
* fix: CUDA context creation * fix: detokenizer unpredictable text length
1 parent 826334b commit a2b2bc3

File tree

12 files changed

+33
-59
lines changed

12 files changed

+33
-59
lines changed

llama/addon/AddonContext.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -398,16 +398,6 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
398398
};
399399

400400
AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
401-
batchMemorySize = 0;
402-
has_batch = false;
403-
batch_n_tokens = 0;
404-
n_cur = 0;
405-
406-
uint64_t loadedContextMemorySize = 0;
407-
bool contextLoaded = false;
408-
409-
bool disposed = false;
410-
411401
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
412402
model->Ref();
413403

llama/addon/AddonContext.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
99
llama_context_params context_params;
1010
llama_context* ctx;
1111
llama_batch batch;
12-
uint64_t batchMemorySize;
13-
bool has_batch;
14-
int32_t batch_n_tokens;
15-
int n_cur;
12+
uint64_t batchMemorySize = 0;
13+
bool has_batch = false;
14+
int32_t batch_n_tokens = 0;
15+
int n_cur = 0;
1616

17-
uint64_t loadedContextMemorySize;
18-
bool contextLoaded;
17+
uint64_t loadedContextMemorySize = 0;
18+
bool contextLoaded = false;
1919

20-
bool disposed;
20+
bool disposed = false;
2121

2222
AddonContext(const Napi::CallbackInfo& info);
2323
~AddonContext();

llama/addon/AddonGrammar.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
#include "AddonGrammar.h"
33

44
AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
5-
hasAddonExportsRef = false;
6-
75
// Get the model path
86
std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
97
bool should_print_grammar = false;

llama/addon/AddonGrammar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
99
public:
1010
grammar_parser::parse_state parsed_grammar;
1111
Napi::Reference<Napi::Object> addonExportsRef;
12-
bool hasAddonExportsRef;
12+
bool hasAddonExportsRef = false;
1313

1414
AddonGrammar(const Napi::CallbackInfo& info);
1515
~AddonGrammar();

llama/addon/AddonGrammarEvaluationState.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
#include "AddonGrammar.h"
77

88
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
9-
grammar = nullptr;
10-
119
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[0].As<Napi::Object>());
1210
grammarDef->Ref();
1311

llama/addon/AddonGrammarEvaluationState.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
77
public:
88
AddonGrammar* grammarDef;
9-
llama_grammar* grammar;
9+
llama_grammar* grammar = nullptr;
1010

1111
AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
1212
~AddonGrammarEvaluationState();

llama/addon/AddonModel.cpp

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -224,17 +224,6 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
224224
};
225225

226226
AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
227-
loadedModelSize = 0;
228-
hasAddonExportsRef = false;
229-
modelLoaded = false;
230-
abortModelLoad = false;
231-
model_load_stopped = false;
232-
rawModelLoadPercentage = 0;
233-
modelLoadPercentage = 0;
234-
onLoadProgressEventCallbackSet = false;
235-
hasLoadAbortSignal = false;
236-
disposed = false;
237-
238227
data = new AddonModelData();
239228
model_params = llama_model_default_params();
240229

@@ -456,18 +445,19 @@ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
456445
? info[1].As<Napi::Boolean>().Value()
457446
: false;
458447

459-
std::vector<char> result(8, 0);
460-
const int n_length = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
448+
std::string result;
449+
result.resize(std::max(result.capacity(), tokens.ElementLength()));
461450

462-
if (n_length < 0) {
463-
result.resize(-n_length);
464-
int check = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
465-
GGML_ASSERT(check == -n_length);
466-
} else {
467-
result.resize(n_length);
451+
int n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
452+
if (n_chars < 0) {
453+
result.resize(-n_chars);
454+
n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
455+
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
468456
}
469457

470-
return Napi::String::New(info.Env(), result.data(), result.size());
458+
result.resize(n_chars);
459+
460+
return Napi::String::New(info.Env(), result);
471461
}
472462

473463
Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {

llama/addon/AddonModel.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,22 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
99
llama_model_params model_params;
1010
std::vector<llama_model_kv_override> kv_overrides;
1111
llama_model* model;
12-
uint64_t loadedModelSize;
12+
uint64_t loadedModelSize = 0;
1313
Napi::Reference<Napi::Object> addonExportsRef;
14-
bool hasAddonExportsRef;
14+
bool hasAddonExportsRef = false;
1515
AddonModelData* data;
1616

1717
std::string modelPath;
18-
bool modelLoaded;
19-
bool abortModelLoad;
20-
bool model_load_stopped;
21-
float rawModelLoadPercentage;
22-
unsigned modelLoadPercentage;
18+
bool modelLoaded = false;
19+
bool abortModelLoad = false;
20+
bool model_load_stopped = false;
21+
float rawModelLoadPercentage = 0;
22+
unsigned modelLoadPercentage = 0;
2323
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
24-
bool onLoadProgressEventCallbackSet;
25-
bool hasLoadAbortSignal;
24+
bool onLoadProgressEventCallbackSet = false;
25+
bool hasLoadAbortSignal = false;
2626

27-
bool disposed;
27+
bool disposed = false;
2828

2929
AddonModel(const Napi::CallbackInfo& info);
3030
~AddonModel();

llama/addon/AddonModelLora.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
4242
};
4343

4444
AddonModelLora::AddonModelLora(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModelLora>(info) {
45-
usages = 0;
46-
4745
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
4846
loraFilePath = info[1].As<Napi::String>().Utf8Value();
4947
lora_adapter = nullptr;

llama/addon/AddonModelLora.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class AddonModelLora : public Napi::ObjectWrap<AddonModelLora> {
88
AddonModel* model;
99
llama_lora_adapter * lora_adapter;
1010
std::string loraFilePath;
11-
uint32_t usages;
11+
uint32_t usages = 0;
1212

1313
AddonModelLora(const Napi::CallbackInfo& info);
1414
~AddonModelLora();

0 commit comments

Comments
 (0)