|
1 | 1 | Offspring for hoivb612 |
2 | 2 | https://github.com/hoivb612/llama.cpp |
3 | 3 |
|
4 | | -#include <sys/stat.h> |
5 | | -#include <sys/types.h> |
6 | | -#include <string> |
7 | | -#include <hash> |
| 4 | +=========================================== |
8 | 5 |
|
9 | | -static std::hash<std::string> hasher; |
10 | | -static const char* dir = "./llama_cache"; |
| 6 | +For ARM64: |
| 7 | +cmake --preset arm64-windows-llvm-release -D GGML_LLAMAFILE=OFF -D GGML_OPENMP=OFF -B build.arm |
| 8 | +cd build.arm |
| 9 | +cmake --build . --config RelWithDebInfo --target llama-bench xbapp |
11 | 10 |
|
12 | | -// create the cache dir if it does not exist yet |
13 | | -struct stat info; |
14 | | -if (stat(dir, &info) != 0) { |
15 | | - mkdir(dir, 0777); |
16 | | -} |
| 11 | +bin\llama-bench.exe -m c:\llama.cpp\models\Llama-3.2-3B-Instruct-Q4_0_4_8.gguf -t 8 -p 128 -n 64 |
| 12 | +| model | size | params | backend | threads | test | t/s | |
| 13 | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | |
| 14 | +| llama 3B Q4_0_4_8 | 2.08 GiB | 3.61 B | CPU | 8 | pp128 | 306.69 ± 9.23 | |
| 15 | +| llama 3B Q4_0_4_8 | 2.08 GiB | 3.61 B | CPU | 8 | tg64 | 45.39 ± 0.74 | |
| 16 | + |
| 17 | +bin\llama-bench.exe -m c:\llama.cpp\models\Llama-3.2-3B-Instruct-Q2_K-Second.gguf -t 8 -p 128 -n 64 |
| 18 | +| model | size | params | backend | threads | test | t/s | |
| 19 | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | |
| 20 | +| llama 3B Q2_K - Medium | 1.56 GiB | 3.61 B | CPU | 8 | pp128 | 71.69 ± 0.43 | |
| 21 | +| llama 3B Q2_K - Medium | 1.56 GiB | 3.61 B | CPU | 8 | tg64 | 46.66 ± 0.47 | |
| 22 | + |
| 23 | +bin\llama-bench.exe -m c:\llama.cpp\models\Phi-3.5-mini-instruct-Q4_0_4_8.gguf -t 8 -p 128 -n 64 |
| 24 | +| model | size | params | backend | threads | test | t/s | |
| 25 | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | |
| 26 | +| phi3 3B Q4_0_4_8 | 2.03 GiB | 3.82 B | CPU | 8 | pp128 | 233.87 ± 6.45 | |
| 27 | +| phi3 3B Q4_0_4_8 | 2.03 GiB | 3.82 B | CPU | 8 | tg64 | 40.70 ± 0.47 | |
| 28 | + |
| 29 | +bin\llama-bench.exe -m c:\llama.cpp\models\Phi-3.5-mini-instruct-Q2_K.gguf -t 8 -p 128 -n 64 |
| 30 | +| model | size | params | backend | threads | test | t/s | |
| 31 | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | |
| 32 | +| phi3 3B Q2_K - Medium | 1.32 GiB | 3.82 B | CPU | 8 | pp128 | 50.47 ± 5.81 | |
| 33 | +| phi3 3B Q2_K - Medium | 1.32 GiB | 3.82 B | CPU | 8 | tg64 | 34.63 ± 0.20 | |
17 | 34 |
|
18 | | -// default generated file name |
19 | | -std::string pfx_path(dir); |
20 | | -std::string full_file_path = pfx_path + "/" + std::to_string(hasher(pfx)); |
|
0 commit comments