Skip to content

Commit 2f7d58e

Browse files
authored
Merge pull request #42 from l3utterfly/master
merge upstream
2 parents 934eb9b + c81f3bb commit 2f7d58e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+2031
-1246
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2020
cancel-in-progress: true
2121

22+
# Fine-grant permission
23+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24+
permissions:
25+
contents: write # for creating release
26+
2227
env:
2328
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2429
GGML_NLOOP: 3

.github/workflows/close-issue.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ on:
33
schedule:
44
- cron: "42 0 * * *"
55

6+
# Fine-grant permission
7+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8+
permissions:
9+
issues: write
10+
611
jobs:
712
close-issues:
813
runs-on: ubuntu-latest

.github/workflows/nix-ci-aarch64.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ concurrency:
2121
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2222
cancel-in-progress: true
2323

24+
# Fine-grant permission
25+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26+
permissions:
27+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28+
id-token: write
29+
contents: read
30+
2431
jobs:
2532
nix-build-aarch64:
2633
runs-on: ubuntu-latest

.github/workflows/nix-ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ concurrency:
1212
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
1313
cancel-in-progress: true
1414

15+
# Fine-grant permission
16+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17+
permissions:
18+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19+
id-token: write
20+
contents: read
21+
1522
jobs:
1623
nix-eval:
1724
strategy:

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
6363
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
6464

6565
# utils
66-
option(LLAMA_BUILD_COMMON "llama: build common utils library" ON)
66+
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
6767

6868
# extra artifacts
6969
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
@@ -201,12 +201,12 @@ if (LLAMA_BUILD_COMMON)
201201
add_subdirectory(common)
202202
endif()
203203

204-
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
204+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
205205
include(CTest)
206206
add_subdirectory(tests)
207207
endif()
208208

209-
if (LLAMA_BUILD_EXAMPLES)
209+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
210210
add_subdirectory(examples)
211211
add_subdirectory(pocs)
212212
endif()

CONTRIBUTING.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
11
# Pull requests (for contributors)
22

33
- Test your changes:
4-
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
4+
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the `ggml` library
55
- Execute [the full CI locally on your machine](ci/README.md) before publishing
6-
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
7-
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
8-
- Consider allowing write access to your branch for faster review
6+
- Optionally rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs
7+
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
98
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
109

1110
# Pull requests (for collaborators)
1211

1312
- Squash-merge PRs
1413
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
15-
- Optionally, pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
14+
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
1615

1716
# Coding guidelines
1817

1918
- Avoid adding third-party dependencies, extra files, extra headers, etc.
2019
- Always consider cross-compatibility with other operating systems and architectures
21-
- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
20+
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
2221
- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
2322
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
2423
- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
169169
- [AIKit](https://github.com/sozercan/aikit) (MIT)
170170
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
171171
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)
172+
- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL)
172173

173174
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
174175

ci/run.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#/bin/bash
1+
#!/bin/bash
22
#
33
# sample usage:
44
#
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
751751

752752
model_f16="${path_models}/ggml-model-f16.gguf"
753753

754-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s><s>hi\nwhat is panda?</s><s>it's a bear\nwhat is panda?</s><s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
754+
# for this model, the SEP token is "</s>"
755+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
755756

756757
# sample output
757758
# rerank score 0: 0.029
@@ -774,7 +775,7 @@ function gg_run_rerank_tiny {
774775

775776
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
776777
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
777-
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log
778+
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
778779

779780
set +e
780781
}

common/arg.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
911911
).set_sparam());
912912
add_opt(llama_arg(
913913
{"-s", "--seed"}, "SEED",
914-
format("RNG seed (default: %u, use random seed for %u)", params.sparams.seed, LLAMA_DEFAULT_SEED),
914+
format("RNG seed (default: %d, use random seed for %d)", params.sparams.seed, LLAMA_DEFAULT_SEED),
915915
[](gpt_params & params, const std::string & value) {
916916
params.sparams.seed = std::stoul(value);
917917
}
@@ -1838,9 +1838,23 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
18381838
params.endpoint_metrics = true;
18391839
}
18401840
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_METRICS"));
1841+
add_opt(llama_arg(
1842+
{"--slots"},
1843+
format("enable slots monitoring endpoint (default: %s)", params.endpoint_slots ? "enabled" : "disabled"),
1844+
[](gpt_params & params) {
1845+
params.endpoint_slots = true;
1846+
}
1847+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS"));
1848+
add_opt(llama_arg(
1849+
{"--props"},
1850+
format("enable changing global properties via POST /props (default: %s)", params.endpoint_props ? "enabled" : "disabled"),
1851+
[](gpt_params & params) {
1852+
params.endpoint_props = true;
1853+
}
1854+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_PROPS"));
18411855
add_opt(llama_arg(
18421856
{"--no-slots"},
1843-
format("disables slots monitoring endpoint (default: %s)", params.endpoint_slots ? "enabled" : "disabled"),
1857+
"disables slots monitoring endpoint",
18441858
[](gpt_params & params) {
18451859
params.endpoint_slots = false;
18461860
}

common/common.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
838838
return iparams;
839839
}
840840

841+
if (params.reranking) {
842+
bool ok = true;
843+
844+
if (llama_token_bos(model) == LLAMA_TOKEN_NULL) {
845+
LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__);
846+
ok = false;
847+
}
848+
849+
if (llama_token_eos(model) == LLAMA_TOKEN_NULL) {
850+
LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__);
851+
ok = false;
852+
}
853+
854+
if (llama_token_sep(model) == LLAMA_TOKEN_NULL) {
855+
LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__);
856+
ok = false;
857+
}
858+
859+
if (!ok) {
860+
llama_free_model(model);
861+
862+
return iparams;
863+
}
864+
}
865+
841866
auto cparams = llama_context_params_from_gpt_params(params);
842867

843868
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
855880
if (cvec.n_embd == -1) {
856881
llama_free(lctx);
857882
llama_free_model(model);
883+
858884
return iparams;
859885
}
860886

@@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
867893
if (err) {
868894
llama_free(lctx);
869895
llama_free_model(model);
896+
870897
return iparams;
871898
}
872899
}
@@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
889916
llama_lora_adapters_apply(lctx, iparams.lora_adapters);
890917
}
891918

892-
if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
919+
if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
893920
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
894921
params.sparams.ignore_eos = false;
895922
}
@@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
930957

931958
iparams.model = model;
932959
iparams.context = lctx;
960+
933961
return iparams;
934962
}
935963

0 commit comments

Comments
 (0)