Skip to content

Commit fb45b75

Browse files
authored
Merge pull request #2 from bandoti/sparkle_master_fix
Merge latest master branch
2 parents 7c1a685 + e4de404 commit fb45b75

File tree

193 files changed

+26505
-23040
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

193 files changed

+26505
-23040
lines changed

.github/ISSUE_TEMPLATE/010-bug-compilation.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,22 @@ body:
6565
If possible, please do a git bisect and identify the exact commit that introduced the bug.
6666
validations:
6767
required: false
68+
- type: textarea
69+
id: command
70+
attributes:
71+
label: Compile command
72+
description: >
73+
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
74+
This will be automatically formatted into code, so no need for backticks.
75+
render: shell
76+
validations:
77+
required: true
6878
- type: textarea
6979
id: logs
7080
attributes:
7181
label: Relevant log output
7282
description: >
73-
Please copy and paste any relevant log output, including the command that you entered and any generated text.
83+
Please copy and paste any relevant log output, including any generated text.
7484
This will be automatically formatted into code, so no need for backticks.
7585
render: shell
7686
validations:

.github/ISSUE_TEMPLATE/019-bug-misc.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ body:
5252
- Other (Please specify in the next section)
5353
validations:
5454
required: false
55+
- type: textarea
56+
id: command
57+
attributes:
58+
label: Command line
59+
description: >
60+
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
61+
This will be automatically formatted into code, so no need for backticks.
62+
render: shell
63+
validations:
64+
required: false
5565
- type: textarea
5666
id: info
5767
attributes:
@@ -74,7 +84,7 @@ body:
7484
attributes:
7585
label: Relevant log output
7686
description: >
77-
If applicable, please copy and paste any relevant log output, including the command that you entered and any generated text.
87+
If applicable, please copy and paste any relevant log output, including any generated text.
7888
This will be automatically formatted into code, so no need for backticks.
7989
render: shell
8090
validations:

.github/workflows/build.yml

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ jobs:
6060
-DLLAMA_CURL=ON \
6161
-DGGML_METAL_USE_BF16=ON \
6262
-DGGML_METAL_EMBED_LIBRARY=ON \
63-
-DGGML_RPC=ON \
64-
-DBUILD_SHARED_LIBS=OFF
63+
-DGGML_RPC=ON
6564
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
6665
6766
- name: Test
@@ -123,8 +122,7 @@ jobs:
123122
-DLLAMA_FATAL_WARNINGS=ON \
124123
-DLLAMA_CURL=ON \
125124
-DGGML_METAL=OFF \
126-
-DGGML_RPC=ON \
127-
-DBUILD_SHARED_LIBS=OFF
125+
-DGGML_RPC=ON
128126
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
129127
130128
- name: Test
@@ -181,7 +179,7 @@ jobs:
181179
run: |
182180
mkdir build
183181
cd build
184-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
182+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
185183
cmake --build . --config Release -j $(nproc)
186184
187185
- name: Test
@@ -651,23 +649,23 @@ jobs:
651649
matrix:
652650
include:
653651
- build: 'noavx-x64'
654-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
652+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
655653
- build: 'avx2-x64'
656-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
654+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
657655
- build: 'avx-x64'
658-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
656+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
659657
- build: 'avx512-x64'
660-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
658+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
661659
- build: 'openblas-x64'
662-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
660+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
663661
- build: 'kompute-x64'
664-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
662+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
665663
- build: 'vulkan-x64'
666-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
664+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
667665
- build: 'llvm-arm64'
668-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
666+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
669667
- build: 'msvc-arm64'
670-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
668+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
671669
- build: 'llvm-arm64-opencl-adreno'
672670
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
673671

@@ -914,7 +912,7 @@ jobs:
914912
shell: cmd
915913
run: |
916914
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
917-
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
915+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
918916
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
919917
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
920918
cmake --build build --config Release
@@ -1239,7 +1237,7 @@ jobs:
12391237

12401238
- name: Create release
12411239
id: create_release
1242-
uses: anzz1/action-create-release@v1
1240+
uses: ggml-org/action-create-release@v1
12431241
env:
12441242
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
12451243
with:

.github/workflows/docker.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,9 @@ jobs:
9797
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
9898
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
9999

100-
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
101100
- name: Free Disk Space (Ubuntu)
102101
if: ${{ matrix.config.free_disk_space == true }}
103-
uses: jlumbroso/free-disk-space@main
102+
uses: ggml-org/free-disk-space@v1.3.1
104103
with:
105104
# this might remove tools that are actually needed,
106105
# if set to "true" but frees about 6 GB

.github/workflows/editorconfig.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,7 @@ jobs:
2323
runs-on: ubuntu-latest
2424
steps:
2525
- uses: actions/checkout@v4
26-
- uses: editorconfig-checker/action-editorconfig-checker@main
26+
- uses: editorconfig-checker/action-editorconfig-checker@v2
27+
with:
28+
version: v3.0.3
2729
- run: editorconfig-checker

CODEOWNERS

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
22

33
/ci/ @ggerganov
4-
/.devops/ @ngxson
4+
/.devops/*.Dockerfile @ngxson
55
/examples/server/ @ngxson
6+
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
7+
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
8+
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
9+
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
10+
/ggml/src/ggml-opt.cpp @JohannesGaessler
11+
/ggml/src/gguf.cpp @JohannesGaessler

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
6969
- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen)
7070
- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557)
7171
- [x] [Phi models](https://huggingface.co/models?search=microsoft/phi)
72+
- [x] [PhiMoE](https://github.com/ggerganov/llama.cpp/pull/11003)
7273
- [x] [GPT-2](https://huggingface.co/gpt2)
7374
- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118)
7475
- [x] [InternLM2](https://huggingface.co/models?search=internlm2)
@@ -201,6 +202,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
201202
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
202203
- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs
203204
- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly
205+
- [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server
204206

205207
</details>
206208

common/arg.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
2222
return *this;
2323
}
2424

25+
common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
26+
this->excludes = std::move(excludes);
27+
return *this;
28+
}
29+
2530
common_arg & common_arg::set_env(const char * env) {
2631
help = help + "\n(env: " + env + ")";
2732
this->env = env;
@@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) {
3742
return examples.find(ex) != examples.end();
3843
}
3944

45+
bool common_arg::is_exclude(enum llama_example ex) {
46+
return excludes.find(ex) != excludes.end();
47+
}
48+
4049
bool common_arg::get_value_from_env(std::string & output) {
4150
if (env == nullptr) return false;
4251
char * value = std::getenv(env);
@@ -420,7 +429,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
420429
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
421430
*/
422431
auto add_opt = [&](common_arg arg) {
423-
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
432+
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
424433
ctx_arg.options.push_back(std::move(arg));
425434
}
426435
};
@@ -649,7 +658,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
649658
[](common_params & params, const std::string & value) {
650659
params.prompt = value;
651660
}
652-
));
661+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
653662
add_opt(common_arg(
654663
{"--no-perf"},
655664
string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
@@ -673,7 +682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
673682
params.prompt.pop_back();
674683
}
675684
}
676-
));
685+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
677686
add_opt(common_arg(
678687
{"--in-file"}, "FNAME",
679688
"an input file (repeat to specify multiple files)",
@@ -700,7 +709,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
700709
params.prompt = ss.str();
701710
fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
702711
}
703-
));
712+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
704713
add_opt(common_arg(
705714
{"-e", "--escape"},
706715
string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),
@@ -1512,15 +1521,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15121521
{"--lora"}, "FNAME",
15131522
"path to LoRA adapter (can be repeated to use multiple adapters)",
15141523
[](common_params & params, const std::string & value) {
1515-
params.lora_adapters.push_back({ std::string(value), 1.0 });
1524+
params.lora_adapters.push_back({ std::string(value), 1.0, nullptr });
15161525
}
15171526
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
15181527
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
15191528
add_opt(common_arg(
15201529
{"--lora-scaled"}, "FNAME", "SCALE",
15211530
"path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
15221531
[](common_params & params, const std::string & fname, const std::string & scale) {
1523-
params.lora_adapters.push_back({ fname, std::stof(scale) });
1532+
params.lora_adapters.push_back({ fname, std::stof(scale), nullptr });
15241533
}
15251534
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
15261535
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));

common/arg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
struct common_arg {
1414
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
15+
std::set<enum llama_example> excludes = {};
1516
std::vector<const char *> args;
1617
const char * value_hint = nullptr; // help text or example for arg value
1718
const char * value_hint_2 = nullptr; // for second arg value
@@ -53,9 +54,11 @@ struct common_arg {
5354
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
5455

5556
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
57+
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
5658
common_arg & set_env(const char * env);
5759
common_arg & set_sparam();
5860
bool in_example(enum llama_example ex);
61+
bool is_exclude(enum llama_example ex);
5962
bool get_value_from_env(std::string & output);
6063
bool has_value_from_env();
6164
std::string to_string();

0 commit comments

Comments
 (0)