Skip to content

Commit fc45ad5

Browse files
committed
Merge branch 'master' into dev-refactoring
2 parents c518705 + 9220426 commit fc45ad5

File tree

31 files changed

+2240
-411
lines changed

31 files changed

+2240
-411
lines changed

.clang-format

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25-
BinPackArguments: true
26-
BinPackParameters: true # OnePerLine
25+
BinPackArguments: false
26+
BinPackParameters: false # OnePerLine
2727
BitFieldColonSpacing: Both
2828
BreakBeforeBraces: Custom # Attach
2929
BraceWrapping:
@@ -70,15 +70,18 @@ ExperimentalAutoDetectBinPacking: false
7070
FixNamespaceComments: true
7171
IncludeBlocks: Regroup
7272
IncludeCategories:
73-
- Regex: '^<.*\.h>'
73+
- Regex: '".*"'
7474
Priority: 1
7575
SortPriority: 0
76-
- Regex: '^<.*'
76+
- Regex: '^<.*\.h>'
7777
Priority: 2
7878
SortPriority: 0
79-
- Regex: '.*'
79+
- Regex: '^<.*'
8080
Priority: 3
8181
SortPriority: 0
82+
- Regex: '.*'
83+
Priority: 4
84+
SortPriority: 0
8285
IncludeIsMainRegex: '([-_](test|unittest))?$'
8386
IncludeIsMainSourceRegex: ''
8487
IndentAccessModifiers: false

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@
99
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
1010
/ggml/src/ggml-opt.cpp @JohannesGaessler
1111
/ggml/src/gguf.cpp @JohannesGaessler
12+
/ggml/src/ggml-vulkan/ @0cc4m

README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
270270
| [CANN](docs/build.md#cann) | Ascend NPU |
271271
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
272272
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
273-
274273
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
275274

276275
## Obtaining and quantizing models
@@ -436,7 +435,7 @@ To learn more about model quantization, [read this documentation](tools/quantize
436435

437436
## [`llama-perplexity`](tools/perplexity)
438437

439-
#### A tool for measuring the perplexity [^1][^2] (and other quality metrics) of a model over a given text.
438+
#### A tool for measuring the [perplexity](tools/perplexity/README.md) [^1] (and other quality metrics) of a model over a given text.
440439

441440
- <details open>
442441
<summary>Measure the perplexity over a text file</summary>
@@ -459,8 +458,7 @@ To learn more about model quantization, [read this documentation](tools/quantize
459458

460459
</details>
461460

462-
[^1]: [tools/perplexity/README.md](./tools/perplexity/README.md)
463-
[^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity)
461+
[^1]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity)
464462

465463
## [`llama-bench`](tools/llama-bench)
466464

common/common.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,15 @@ void string_replace_all(std::string & s, const std::string & search, const std::
448448
bool string_ends_with(const std::string_view & str, const std::string_view & suffix) {
449449
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
450450
}
451+
452+
bool string_remove_suffix(std::string & str, const std::string_view & suffix) {
453+
bool has_suffix = string_ends_with(str, suffix);
454+
if (has_suffix) {
455+
str = str.substr(0, str.size() - suffix.size());
456+
}
457+
return has_suffix;
458+
}
459+
451460
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop) {
452461
if (!str.empty() && !stop.empty()) {
453462
const char text_last_char = str.back();

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ static bool string_starts_with(const std::string & str,
534534

535535
// While we wait for C++20's std::string::ends_with...
536536
bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
537+
bool string_remove_suffix(std::string & str, const std::string_view & suffix);
537538
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
538539

539540
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);

docs/build.md

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,8 @@ On Linux it is possible to use unified memory architecture (UMA) to share main m
305305

306306
## Vulkan
307307

308-
**Windows**
309-
310-
### w64devkit
308+
### For Windows Users:
309+
**w64devkit**
311310

312311
Download and extract [`w64devkit`](https://github.com/skeeto/w64devkit/releases).
313312

@@ -334,7 +333,7 @@ cmake -B build -DGGML_VULKAN=ON
334333
cmake --build build --config Release
335334
```
336335

337-
### Git Bash MINGW64
336+
**Git Bash MINGW64**
338337

339338
Download and install [`Git-SCM`](https://git-scm.com/downloads/win) with the default settings
340339

@@ -357,7 +356,8 @@ Now you can load the model in conversation mode using `Vulkan`
357356
build/bin/Release/llama-cli -m "[PATH TO MODEL]" -ngl 100 -c 16384 -t 10 -n -2 -cnv
358357
```
359358

360-
### MSYS2
359+
**MSYS2**
360+
361361
Install [MSYS2](https://www.msys2.org/) and then run the following commands in a UCRT terminal to install dependencies.
362362
```sh
363363
pacman -S git \
@@ -373,9 +373,9 @@ cmake -B build -DGGML_VULKAN=ON
373373
cmake --build build --config Release
374374
```
375375

376-
**With docker**:
376+
### For Docker users:
377377

378-
You don't need to install Vulkan SDK. It will be installed inside the container.
378+
You don't need to install the Vulkan SDK. It will be installed inside the container.
379379

380380
```sh
381381
# Build the image
@@ -385,32 +385,29 @@ docker build -t llama-cpp-vulkan --target light -f .devops/vulkan.Dockerfile .
385385
docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-vulkan -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33
386386
```
387387

388-
**Without docker**:
388+
### For Linux users:
389389

390-
Firstly, you need to make sure you have installed [Vulkan SDK](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html)
390+
First, follow the official LunarG instructions for the installation and setup of the Vulkan SDK in the [Getting Started with the Linux Tarball Vulkan SDK](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html) guide.
391391

392-
For example, on Ubuntu 22.04 (jammy), use the command below:
392+
> [!IMPORTANT]
393+
> After completing the first step, ensure that you have used the `source` command on the `setup_env.sh` file inside of the Vulkan SDK in your current terminal session. Otherwise, the build won't work. Additionally, if you close out of your terminal, you must perform this step again if you intend to perform a build. However, there are ways to make this persistent. Refer to the Vulkan SDK guide linked in the first step for more information about any of this.
393394
395+
Second, after verifying that you have followed all of the SDK installation/setup steps, use this command to make sure before proceeding:
394396
```bash
395-
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
396-
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
397-
apt update -y
398-
apt-get install -y vulkan-sdk
399-
# To verify the installation, use the command below:
400397
vulkaninfo
401398
```
402399

403-
Alternatively your package manager might be able to provide the appropriate libraries.
404-
For example for Ubuntu 22.04 you can install `libvulkan-dev` instead.
405-
For Fedora 40, you can install `vulkan-devel`, `glslc` and `glslang` packages.
406-
407-
Then, build llama.cpp using the cmake command below:
408-
400+
Then, assuming you have `cd` into your llama.cpp folder and there are no errors with running `vulkaninfo`, you can proceed to build llama.cpp using the CMake commands below:
409401
```bash
410402
cmake -B build -DGGML_VULKAN=1
411403
cmake --build build --config Release
412-
# Test the output binary (with "-ngl 33" to offload all layers to GPU)
413-
./bin/llama-cli -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4
404+
```
405+
406+
Finally, after finishing your build, you should be able to do something like this:
407+
```bash
408+
# Test the output binary
409+
# "-ngl 99" should offload all of the layers to GPU for most (if not all) models.
410+
./build/bin/llama-cli -m "PATH_TO_MODEL" -p "Hi you how are you" -ngl 99
414411

415412
# You should see in the output, ggml_vulkan detected your GPU. For example:
416413
# ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32

ggml/src/ggml-alloc.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,6 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
2222
return t->view_src != NULL;
2323
}
2424

25-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
26-
if (a->type != b->type) {
27-
return false;
28-
}
29-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
30-
if (a->ne[i] != b->ne[i]) {
31-
return false;
32-
}
33-
if (a->nb[i] != b->nb[i]) {
34-
return false;
35-
}
36-
}
37-
return true;
38-
}
39-
4025
// ops that return true for this function must not use restrict pointers for their backend implementations
4126
static bool ggml_op_can_inplace(enum ggml_op op) {
4227
switch (op) {

ggml/src/ggml-backend.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -352,21 +352,6 @@ ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
352352

353353
// backend copy
354354

355-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
356-
if (a->type != b->type) {
357-
return false;
358-
}
359-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
360-
if (a->ne[i] != b->ne[i]) {
361-
return false;
362-
}
363-
if (a->nb[i] != b->nb[i]) {
364-
return false;
365-
}
366-
}
367-
return true;
368-
}
369-
370355
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
371356
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
372357

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,9 +494,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
494494

495495
# Fetch KleidiAI sources:
496496
include(FetchContent)
497-
set(KLEIDIAI_COMMIT_TAG "v1.9.0")
497+
set(KLEIDIAI_COMMIT_TAG "v1.11.0")
498498
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
499-
set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017")
499+
set(KLEIDIAI_ARCHIVE_MD5 "3fe9e5ab964c375c53839296eb71eaa2")
500500

501501
if (POLICY CMP0135)
502502
cmake_policy(SET CMP0135 NEW)

0 commit comments

Comments
 (0)