Skip to content

Commit 86e1bee

Browse files
authored
fix: ranking empty inputs (#415)
* fix: ranking empty inputs * fix: adapt to breaking `llama.cpp` changes
1 parent d1b4416 commit 86e1bee

17 files changed

+128
-78
lines changed

.releaserc.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,23 @@ const config: Omit<GlobalConfig, "repositoryUrl" | "tagFormat"> = {
5252
}],
5353
["@semantic-release/release-notes-generator", {
5454
writerOpts: {
55-
footerPartial: newFooterTemplate
55+
footerPartial: newFooterTemplate,
56+
57+
// ensure that the "Features" group comes before the "Bug Fixes" group
58+
commitGroupsSort(a: {title: string}, b: {title: string}) {
59+
const order = ["Features", "Bug Fixes"];
60+
const aIndex = order.indexOf(a?.title);
61+
const bIndex = order.indexOf(b?.title);
62+
63+
if (aIndex >= 0 && bIndex >= 0)
64+
return aIndex - bIndex;
65+
else if (aIndex >= 0)
66+
return -1;
67+
else if (bIndex >= 0)
68+
return 1;
69+
70+
return (a?.title || "").localeCompare(b?.title || "");
71+
}
5672
}
5773
}],
5874
["@semantic-release/exec", {

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
* Up-to-date with the latest `llama.cpp`. Download and compile the latest release with a [single CLI command](https://node-llama-cpp.withcat.ai//guide/building-from-source#downloading-a-release)
2828
* Enforce a model to generate output in a parseable format, [like JSON](https://node-llama-cpp.withcat.ai/guide/chat-session#json-response), or even force it to [follow a specific JSON schema](https://node-llama-cpp.withcat.ai/guide/chat-session#response-json-schema)
2929
* [Provide a model with functions it can call on demand](https://node-llama-cpp.withcat.ai/guide/chat-session#function-calling) to retrieve information or perform actions
30-
* [Embedding support](https://node-llama-cpp.withcat.ai/guide/embedding)
30+
* [Embedding and reranking support](https://node-llama-cpp.withcat.ai/guide/embedding)
3131
* [Safe against special token injection attacks](https://node-llama-cpp.withcat.ai/guide/llama-text#input-safety-in-node-llama-cpp)
3232
* Great developer experience with full TypeScript support, and [complete documentation](https://node-llama-cpp.withcat.ai/guide/)
3333
* Much more

docs/guide/Vulkan.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor
5959

6060
:::
6161

62+
* :::details Windows only: enable long paths support
63+
Open cmd as Administrator and run this command:
64+
```shell
65+
reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f
66+
```
67+
:::
68+
6269
### Building From Source
6370
When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.
6471

llama/addon/AddonContext.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,13 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
105105

106106
void Execute() {
107107
try {
108-
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
108+
context->ctx = llama_init_from_model(context->model->model, context->context_params);
109109

110110
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
111111
} catch (const std::exception& e) {
112112
SetError(e.what());
113113
} catch(...) {
114-
SetError("Unknown error when calling \"llama_new_context_with_model\"");
114+
SetError("Unknown error when calling \"llama_init_from_model\"");
115115
}
116116
}
117117
void OnOK() {
@@ -252,7 +252,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
252252
sampler->rebuildChainIfNeeded();
253253

254254
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
255-
const int n_vocab = llama_n_vocab(ctx->model->model);
255+
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
256256

257257
auto & candidates = sampler->tokenCandidates;
258258
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
@@ -525,7 +525,7 @@ Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
525525
has_batch = true;
526526
batch_n_tokens = n_tokens;
527527

528-
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
528+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
529529
if (newBatchMemorySize > batchMemorySize) {
530530
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
531531
batchMemorySize = newBatchMemorySize;
@@ -645,7 +645,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
645645
return info.Env().Undefined();
646646
}
647647

648-
const int n_embd = llama_n_embd(model->model);
648+
const int n_embd = llama_model_n_embd(model->model);
649649
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
650650
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
651651
if (embeddings == NULL) {
@@ -716,23 +716,25 @@ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Nap
716716
const auto draftCtx = draftContext->ctx;
717717
const auto currentModel = model->model;
718718
const auto draftModel = draftContext->model->model;
719+
const auto currentVocab = model->vocab;
720+
const auto draftVocab = draftContext->model->vocab;
719721

720-
if (llama_vocab_type(currentModel) != llama_vocab_type(draftModel)) {
722+
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
721723
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
722724
return info.Env().Undefined();
723725
}
724726

725-
if (llama_add_bos_token(currentModel) != llama_add_bos_token(draftModel) ||
726-
llama_add_eos_token(currentModel) != llama_add_eos_token(draftModel) ||
727-
llama_token_bos(currentModel) != llama_token_bos(draftModel) ||
728-
llama_token_eos(currentModel) != llama_token_eos(draftModel)
727+
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
728+
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
729+
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
730+
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
729731
) {
730732
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
731733
return info.Env().Undefined();
732734
}
733735

734-
const int currentModelVocabSize = llama_n_vocab(currentModel);
735-
const int draftModelVocabSize = llama_n_vocab(draftModel);
736+
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
737+
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
736738

737739
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
738740

@@ -747,8 +749,8 @@ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Nap
747749

748750
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
749751
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
750-
const char * currentTokenText = llama_token_get_text(currentModel, i);
751-
const char * draftTokenText = llama_token_get_text(draftModel, i);
752+
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
753+
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
752754
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
753755
Napi::Error::New(
754756
info.Env(),
@@ -767,7 +769,7 @@ Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
767769
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
768770
float scale = info[1].As<Napi::Number>().FloatValue();
769771

770-
llama_lora_adapter_set(ctx, lora->lora_adapter, scale);
772+
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
771773

772774
return info.Env().Undefined();
773775
}

llama/addon/AddonGrammarEvaluationState.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInf
2222
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
2323
grammarDef->Ref();
2424

25-
sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
25+
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
2626
}
2727
}
2828
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {

llama/addon/AddonModel.cpp

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
#include "AddonModelData.h"
99
#include "AddonModelLora.h"
1010

11-
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
11+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
1212
if (token < 0 || token == LLAMA_TOKEN_NULL) {
1313
return Napi::Number::From(info.Env(), -1);
1414
}
1515

16-
auto tokenAttributes = llama_token_get_attr(model, token);
16+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
1717

1818
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
1919
return Napi::Number::From(info.Env(), -1);
@@ -22,12 +22,12 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
2222
return Napi::Number::From(info.Env(), token);
2323
}
2424

25-
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
25+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
2626
if (token < 0) {
2727
return Napi::Number::From(info.Env(), -1);
2828
}
2929

30-
auto tokenAttributes = llama_token_get_attr(model, token);
30+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
3131

3232
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
3333
return Napi::Number::From(info.Env(), -1);
@@ -93,6 +93,7 @@ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
9393
void Execute() {
9494
try {
9595
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
96+
model->vocab = llama_model_get_vocab(model->model);
9697

9798
model->modelLoaded = model->model != nullptr && model->model != NULL;
9899
} catch (const std::exception& e) {
@@ -190,7 +191,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
190191

191192
void Execute() {
192193
try {
193-
const auto loraAdapter = llama_lora_adapter_init(modelLora->model->model, modelLora->loraFilePath.c_str());
194+
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
194195

195196
if (loraAdapter == nullptr) {
196197
SetError(
@@ -213,7 +214,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
213214
} catch (const std::exception& e) {
214215
SetError(e.what());
215216
} catch(...) {
216-
SetError("Unknown error when calling \"llama_lora_adapter_init\"");
217+
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
217218
}
218219
}
219220
void OnOK() {
@@ -426,7 +427,7 @@ Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
426427
std::string text = info[0].As<Napi::String>().Utf8Value();
427428
bool specialTokens = info[1].As<Napi::Boolean>().Value();
428429

429-
std::vector<llama_token> tokens = common_tokenize(model, text, false, specialTokens);
430+
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
430431

431432
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
432433
for (size_t i = 0; i < tokens.size(); ++i) {
@@ -449,10 +450,10 @@ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
449450
std::string result;
450451
result.resize(std::max(result.capacity(), tokens.ElementLength()));
451452

452-
int n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
453+
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
453454
if (n_chars < 0) {
454455
result.resize(-n_chars);
455-
n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
456+
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
456457
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
457458
}
458459

@@ -467,7 +468,7 @@ Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
467468
return info.Env().Undefined();
468469
}
469470

470-
return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
471+
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
471472
}
472473

473474
Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
@@ -476,7 +477,7 @@ Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
476477
return info.Env().Undefined();
477478
}
478479

479-
return Napi::Number::From(info.Env(), llama_n_embd(model));
480+
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
480481
}
481482

482483
Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
@@ -515,68 +516,63 @@ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
515516
return info.Env().Undefined();
516517
}
517518

518-
auto token = llama_token_bos(model);
519-
if (token == LLAMA_TOKEN_NULL) {
520-
token = llama_token_cls(model);
521-
}
522-
523-
return getNapiControlToken(info, model, token);
519+
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
524520
}
525521
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
526522
if (disposed) {
527523
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
528524
return info.Env().Undefined();
529525
}
530526

531-
return getNapiControlToken(info, model, llama_token_eos(model));
527+
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
532528
}
533529
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
534530
if (disposed) {
535531
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
536532
return info.Env().Undefined();
537533
}
538534

539-
return getNapiToken(info, model, llama_token_nl(model));
535+
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
540536
}
541537
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
542538
if (disposed) {
543539
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
544540
return info.Env().Undefined();
545541
}
546542

547-
return getNapiToken(info, model, llama_token_fim_pre(model));
543+
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
548544
}
549545
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
550546
if (disposed) {
551547
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
552548
return info.Env().Undefined();
553549
}
554550

555-
return getNapiToken(info, model, llama_token_fim_mid(model));
551+
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
556552
}
557553
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
558554
if (disposed) {
559555
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
560556
return info.Env().Undefined();
561557
}
562558

563-
return getNapiToken(info, model, llama_token_fim_suf(model));
559+
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
564560
}
565561
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
566562
if (disposed) {
567563
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
568564
return info.Env().Undefined();
569565
}
570566

571-
return getNapiToken(info, model, llama_token_eot(model));
567+
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
572568
}
573569
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
574570
if (disposed) {
575571
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
576572
return info.Env().Undefined();
577573
}
578574

579-
return getNapiToken(info, model, llama_token_sep(model));
575+
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
580576
}
581577
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
582578
if (disposed) {
@@ -587,7 +583,7 @@ Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
587583
int token = info[0].As<Napi::Number>().Int32Value();
588584
std::stringstream ss;
589585

590-
const char* str = llama_token_get_text(model, token);
586+
const char* str = llama_vocab_get_text(vocab, token);
591587
if (str == nullptr) {
592588
return info.Env().Undefined();
593589
}
@@ -608,7 +604,7 @@ Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
608604
}
609605

610606
int token = info[0].As<Napi::Number>().Int32Value();
611-
auto tokenAttributes = llama_token_get_attr(model, token);
607+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
612608

613609
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
614610
}
@@ -624,25 +620,25 @@ Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
624620

625621
int token = info[0].As<Napi::Number>().Int32Value();
626622

627-
return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
623+
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
628624
}
629625
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
630626
if (disposed) {
631627
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
632628
return info.Env().Undefined();
633629
}
634630

635-
auto vocabularyType = llama_vocab_type(model);
631+
auto vocabularyType = llama_vocab_type(vocab);
636632

637633
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
638634
}
639635
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
640-
const bool addBos = llama_add_bos_token(model);
636+
const bool addBos = llama_vocab_get_add_bos(vocab);
641637

642638
return Napi::Boolean::New(info.Env(), addBos);
643639
}
644640
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
645-
const bool addEos = llama_add_eos_token(model);
641+
const bool addEos = llama_vocab_get_add_eos(vocab);
646642

647643
return Napi::Boolean::New(info.Env(), addEos);
648644
}

llama/addon/AddonModel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
99
llama_model_params model_params;
1010
std::vector<llama_model_kv_override> kv_overrides;
1111
llama_model* model;
12+
const llama_vocab* vocab;
1213
uint64_t loadedModelSize = 0;
1314
Napi::Reference<Napi::Object> addonExportsRef;
1415
bool hasAddonExportsRef = false;

llama/addon/AddonModelLora.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
3030
} catch (const std::exception& e) {
3131
SetError(e.what());
3232
} catch(...) {
33-
SetError("Unknown error when calling \"llama_lora_adapter_free\"");
33+
SetError("Unknown error when calling \"llama_adapter_lora_free\"");
3434
}
3535
}
3636
void OnOK() {
@@ -55,7 +55,7 @@ void AddonModelLora::dispose(bool skipErase) {
5555
if (lora_adapter != nullptr) {
5656
auto loraAdapterToDispose = lora_adapter;
5757
lora_adapter = nullptr;
58-
llama_lora_adapter_free(loraAdapterToDispose);
58+
llama_adapter_lora_free(loraAdapterToDispose);
5959

6060
if (!skipErase && model->data != nullptr) {
6161
model->data->removeLora(this);

0 commit comments

Comments
 (0)