Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
374101f
cmake : enable building llama.cpp using system libggml (#12321)
ckastner Mar 17, 2025
2f21123
vulkan: Adjust coopmat2 tile sizes and selection heuristic (#12258)
jeffbolznv Mar 17, 2025
891c639
vulkan: Pad N dimension of B matrix for coopmat2 perf, to avoid bound…
jeffbolznv Mar 17, 2025
f07690c
vulkan: use fp32 in coopmat2 q4_k dequant function (#12309)
jeffbolznv Mar 17, 2025
cf2270e
vulkan: subgroup size tuning (#12087)
daniandtheweb Mar 17, 2025
484a8ab
vulkan: Add N/2 and N/4 optimized paths in coopmat2 shader (#12312)
jeffbolznv Mar 17, 2025
01e8f21
ggml-vulkan: remove unused find_program(glslc) (#12416)
guusw Mar 17, 2025
b1b132e
cuda : enable CUDA Graph on CUDA Toolkit < 12.x (#12394)
gaugarg-nv Mar 17, 2025
60c9029
docs : bring llama-cli conversation/template docs up-to-date (#12426)
CISC Mar 17, 2025
7dfad38
llama: Add support for RWKV v7 architecture (#12412)
MollySophia Mar 17, 2025
a53f7f7
fixed compilation warnings in ggml-sycl (#12424)
lslusarczyk Mar 18, 2025
fd123cf
Vulkan: Default to 1GB allocations instead of 4GB to avoid fragmentat…
0cc4m Mar 18, 2025
d9a1452
ggml : add SVE support for q6_K_q8_K (#12361)
fj-y-saito Mar 18, 2025
eba92d6
cmake : fix PowerPC build (#12241)
mehendarkarprajwal Mar 18, 2025
810e0af
server : fix warmup draft cache type (#12446)
ggerganov Mar 18, 2025
35cae5b
SYCL: using graphs is configurable by environment variable and compil…
lslusarczyk Mar 18, 2025
8551c44
context : always use non-causal attention for encoder graphs (#12447)
ggerganov Mar 18, 2025
99aa304
llama : add support for EXAONE tied word embeddings (#12451)
ngxson Mar 18, 2025
c6af216
speculative : fix seg fault in certain cases (#12454)
ggerganov Mar 18, 2025
29fff30
llama : support converting Mistral Small text-only (#12450)
ngxson Mar 18, 2025
bb115d2
musa: override warp_size of musa device to 32 (#12445)
yeahdongcn Mar 18, 2025
75422e8
graph : normalize Q, K, V shapes + sync cross attention (#12449)
ggerganov Mar 18, 2025
d84635b
opencl: improve profiling (#12442)
lhez Mar 18, 2025
c446b2e
vulkan: Submit once enough matmul work has been recorded (#12406)
jeffbolznv Mar 19, 2025
a686171
convert : Support chat_template.json (#12460)
CISC Mar 19, 2025
108e53c
llama : add support for GPT2, Bloom and CodeShell tied word embedding…
CISC Mar 19, 2025
0fd8487
Fix visionOS build and add CI (#12415)
guusw Mar 19, 2025
a9b5928
vulkan: optimize iq1 coopmat2 dequant functions (#12427)
jeffbolznv Mar 19, 2025
517b5dd
CUDA: Improve flash decoding kernel GPU occupancy for BS=1 case (#12183)
gaugarg-nv Mar 19, 2025
568013d
context : clear sets containing encoder output sequence ids before st…
fairydreaming Mar 19, 2025
732b5fb
convert : avoid calls to tokenizer.added_tokens_decoder (#12473)
bartowski1182 Mar 20, 2025
3d82dbc
ggml : block interleaving support for Q4_K quantization for x86 AVX2 …
Srihari-mcw Mar 20, 2025
dbb3a47
llama : make Qwen2MoE QKV bias optional (#12477)
CISC Mar 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,35 @@ jobs:
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

macOS-latest-cmake-visionos:
runs-on: macos-latest

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update

- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

macOS-latest-swift:
runs-on: macos-latest

Expand Down
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ else()
set(LLAMA_STANDALONE OFF)
endif()

option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)

if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

Expand Down Expand Up @@ -145,7 +147,13 @@ endif()
# 3rd-party
#

if (NOT TARGET ggml)
if (LLAMA_USE_SYSTEM_GGML)
message(STATUS "Using system-provided libggml, skipping ggml build")
find_package(ggml REQUIRED)
add_library(ggml ALIAS ggml::ggml)
endif()

if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
add_subdirectory(ggml)
# ... otherwise assume ggml is added by a parent CMakeLists.txt
endif()
Expand Down
8 changes: 4 additions & 4 deletions build-xcframework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,8 @@ cmake -B build-visionos -G Xcode \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xros \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos --config Release -- -quiet

Expand All @@ -445,8 +445,8 @@ cmake -B build-visionos-sim -G Xcode \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xrsimulator \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos-sim --config Release -- -quiet

Expand Down
2 changes: 2 additions & 0 deletions cmake/common.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include("ggml/cmake/common.cmake")

function(llama_add_compile_flags)
if (LLAMA_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
Expand Down
291 changes: 236 additions & 55 deletions convert_hf_to_gguf.py

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion docs/backend/SYCL.md
Original file line number Diff line number Diff line change
Expand Up @@ -660,8 +660,9 @@ use 1 SYCL GPUs: [0] with Max compute units:512
|--------------------|---------------------------------------|---------------------------------------------|
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
| GGML_SYCL_GRAPH | ON *(default)* \|OFF *(Optional)* | Enable build with [SYCL Graph extension](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc). |
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |

Expand All @@ -671,6 +672,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG |
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |


Expand Down
41 changes: 36 additions & 5 deletions examples/main/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,24 @@ Once downloaded, place your model in the models folder in llama.cpp.
##### Input prompt (One-and-done)

```bash
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time"
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time"
```
##### Conversation mode (Allow for continuous interaction with the model)

```bash
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma
```

##### Conversation mode using built-in jinja chat template

```bash
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja
```

##### One-and-done query using jinja with custom system prompt and a starting prompt

```bash
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello"
```

##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
Expand All @@ -44,12 +56,24 @@ Once downloaded, place your model in the models folder in llama.cpp.

##### Input prompt (One-and-done)
```powershell
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time"
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time"
```
##### Conversation mode (Allow for continuous interaction with the model)

```powershell
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma
```

##### Conversation mode using built-in jinja chat template

```powershell
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja
```

##### One-and-done query using jinja with custom system prompt and a starting prompt

```powershell
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello"
```

#### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
Expand Down Expand Up @@ -77,6 +101,8 @@ The `llama-cli` program provides several ways to interact with the LLaMA models

- `--prompt PROMPT`: Provide a prompt directly as a command-line option.
- `--file FNAME`: Provide a file containing a prompt or multiple prompts.
- `--system-prompt PROMPT`: Provide a system prompt (will otherwise use the default one in the chat template (if provided)).
- `--system-prompt-file FNAME`: Provide a file containing a system prompt.
- `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.)

## Interaction
Expand All @@ -89,7 +115,10 @@ In interactive mode, users can participate in text generation by injecting their

- `-i, --interactive`: Run the program in interactive mode, allowing users to engage in real-time conversations or provide specific instructions to the model.
- `--interactive-first`: Run the program in interactive mode and immediately wait for user input before starting the text generation.
- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: false)
- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default or provided chat template) (default: true if chat template found)
- `-no-cnv`: Disable conversation mode (default: false)
- `-st, --single-turn`: Only process a single conversation turn (user input) and then exit.
- `--jinja`: Enable jinja chat template parser, will use the model's built-in template or a user-provided one (default: false)
- `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text.

By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs.
Expand Down Expand Up @@ -125,6 +154,8 @@ When --in-prefix or --in-suffix options are enabled the chat template ( --chat-t

Example usage: `--chat-template gemma`

`--chat-template-file FNAME`: Load a custom jinja chat template from an external file, useful if the model contains outdated or incompatible template, some examples can be found in models/templates. Up-to-date chat templates can be downloaded from Hugging Face using scripts/get_chat_template.py

## Context Management

During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations.
Expand Down
8 changes: 4 additions & 4 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,10 @@ struct server_context {
params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers;
params_dft.n_parallel = 1;

// force F16 KV cache for the draft model for extra performance
params_dft.cache_type_k = GGML_TYPE_F16;
params_dft.cache_type_v = GGML_TYPE_F16;

llama_init_dft = common_init_from_params(params_dft);

model_dft = llama_init_dft.model.get();
Expand All @@ -1892,10 +1896,6 @@ struct server_context {
cparams_dft = common_context_params_to_llama(params_dft);
cparams_dft.n_batch = n_ctx_dft;

// force F16 KV cache for the draft model for extra performance
cparams_dft.type_k = GGML_TYPE_F16;
cparams_dft.type_v = GGML_TYPE_F16;

// the context is not needed - we will create one for each slot
llama_init_dft.context.reset();
}
Expand Down
4 changes: 2 additions & 2 deletions examples/speculative/speculative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,11 @@ int main(int argc, char ** argv) {
}

active_seqs.erase(s);
for(int i = 0; i < n_seq_dft; i++) {
for (int i = 0; i < n_seq_dft; i++) {
if (i == s) {
continue;
}
if (drafts[i].tokens[i_dft] == drafts[s].tokens[i_dft]) {
if (drafts[i].active && drafts[i].tokens[i_dft] == drafts[s].tokens[i_dft]) {
// synchronize active status for sequences with the same drafted token
drafts[i].active = drafts[i].active && accept;
if (!drafts[i].active) {
Expand Down
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ option(GGML_OPENMP "ggml: use OpenMP"
option(GGML_RPC "ggml: use RPC" OFF)
option(GGML_SYCL "ggml: use SYCL" OFF)
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
Expand Down
26 changes: 26 additions & 0 deletions ggml/cmake/common.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
function(ggml_get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")

if (CCID MATCHES "Clang")
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)

if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)

if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
endif()

set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()
24 changes: 24 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ extern "C" {
GGML_OP_RMS_NORM,
GGML_OP_RMS_NORM_BACK,
GGML_OP_GROUP_NORM,
GGML_OP_L2_NORM,

GGML_OP_MUL_MAT,
GGML_OP_MUL_MAT_ID,
Expand Down Expand Up @@ -502,6 +503,7 @@ extern "C" {
GGML_OP_ADD_REL_POS,
GGML_OP_RWKV_WKV6,
GGML_OP_GATED_LINEAR_ATTN,
GGML_OP_RWKV_WKV7,

GGML_OP_UNARY,

Expand Down Expand Up @@ -1095,6 +1097,18 @@ extern "C" {
int n_groups,
float eps);

// l2 normalize along rows
// used in rwkv v7
GGML_API struct ggml_tensor * ggml_l2_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
float eps);

GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float eps);

// a - x
// b - dy
GGML_API struct ggml_tensor * ggml_rms_norm_back(
Expand Down Expand Up @@ -1890,6 +1904,16 @@ extern "C" {
struct ggml_tensor * state,
float scale);

GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
struct ggml_context * ctx,
struct ggml_tensor * r,
struct ggml_tensor * w,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * state);

// custom operators

typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
Expand Down
32 changes: 5 additions & 27 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include(CheckCXXCompilerFlag)
include("../cmake/common.cmake")

add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})

Expand All @@ -24,33 +25,6 @@ if (NOT MSVC)
endif()
endif()

function(ggml_get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")

if (CCID MATCHES "Clang")
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)

if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)

if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
endif()

set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()

if (GGML_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND C_FLAGS -Werror)
Expand Down Expand Up @@ -351,6 +325,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
target_link_libraries(ggml-base PRIVATE dl)
endif()

if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
endif()

if (BUILD_SHARED_LIBS)
foreach (target ggml-base ggml)
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
22 changes: 15 additions & 7 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,17 +287,25 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
message(STATUS "PowerPC detected")
execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
if (${POWER_M} MATCHES "POWER10")
list(APPEND ARCH_FLAGS -mcpu=power10)
elseif (${POWER_M} MATCHES "POWER9")
list(APPEND ARCH_FLAGS -mcpu=power9)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
file(READ "/proc/cpuinfo" POWER10_M)
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
endif()

string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")

if (EXTRACTED_NUMBER GREATER_EQUAL 10)
list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64)
elseif (EXTRACTED_NUMBER EQUAL 9)
list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64)
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
else()
list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
message(STATUS "loongarch64 detected")
Expand Down
Loading