Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
24db0a7
Add `ggml_roll` (ggml/1274)
Acly Jun 18, 2025
3f3bcea
ggml: Add Apple support for GGML_CPU_ALL_VARIANTS (llama/14258)
chaxu01 Jun 18, 2025
182e699
ggml-cpu: fix uncaught underscore terminators (llama/14023)
taronaeo Jun 18, 2025
b3fce03
ggml-cpu: reduce asm calls for hsum (llama/14037)
taronaeo Jun 18, 2025
a642a95
metal : add mean kernel (llama/14267)
ggerganov Jun 19, 2025
83e5e8c
Vulkan: Set device max size for host memory to avoid OOM warning and …
0cc4m Jun 19, 2025
7c8169a
llamafile : support s390x SIMD instruction set (llama/14273)
taronaeo Jun 19, 2025
3059287
sycl: Cleanup codepaths in Get Rows in sycl backend (llama/14215)
ShanoToni Jun 19, 2025
172d774
build : suppress gcc15 compile warnings (llama/14261)
fanyang89 Jun 19, 2025
9f9df26
ggml-cpu : remove unnecesary arm feature detection (llama/14281)
slaren Jun 19, 2025
800d157
CUDA: add conv_2d_dw (llama/14265)
am17an Jun 20, 2025
32adef8
ggml: Update KleidiAI to v1.9.0 (llama/14277)
chaxu01 Jun 20, 2025
bbfcd43
ggml : fix repack work size for mul_mat_id (llama/14292)
ggerganov Jun 20, 2025
a0c715c
cuda : synchronize graph capture and cublas handle destruction (llama…
slaren Jun 20, 2025
c07464b
Implement GGML_CPU_ALL_VARIANTS for PowerPC (llama/14286)
ckastner Jun 20, 2025
3026343
sycl: add usage of enqueue_functions extension (llama/14244)
s-Nick Jun 20, 2025
c3d50c2
CUDA: add conv_2d_transpose (llama/14287)
am17an Jun 20, 2025
faae28d
sync : ggml
ggerganov Jun 20, 2025
e7d5fae
talk-llama : sync llama.cpp
ggerganov Jun 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/talk-llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ if (WHISPER_SDL2)
llama-io.cpp
llama-kv-cache-unified.cpp
llama-kv-cache-unified-iswa.cpp
llama-kv-cache-recurrent.cpp
llama-memory-recurrent.cpp
llama-memory-hybrid.cpp
llama-memory.cpp
llama-mmap.cpp
llama-model-loader.cpp
Expand Down
24 changes: 24 additions & 0 deletions examples/talk-llama/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },

{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
Expand Down Expand Up @@ -197,6 +198,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
{ LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
{ LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
{ LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
{ LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
{ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
Expand Down Expand Up @@ -1816,3 +1818,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
return LLM_TENSOR_INFOS.at(tensor);
}

bool llm_arch_is_recurrent(const llm_arch & arch) {
switch (arch) {
case LLM_ARCH_MAMBA:
case LLM_ARCH_RWKV6:
case LLM_ARCH_RWKV6QWEN2:
case LLM_ARCH_RWKV7:
case LLM_ARCH_ARWKV7:
return true;
default:
return false;
}
}

bool llm_arch_is_hybrid(const llm_arch & arch) {
// TODO: There are currently no hybrid models! Once there are, this will be
// the place to identify them
switch (arch) {
default:
return false;
}
}
5 changes: 5 additions & 0 deletions examples/talk-llama/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ enum llm_kv {
LLM_KV_ATTENTION_SCALE,
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
LLM_KV_ATTENTION_LAYER_INDICES,

LLM_KV_ROPE_DIMENSION_COUNT,
LLM_KV_ROPE_DIMENSION_SECTIONS,
Expand Down Expand Up @@ -193,6 +194,7 @@ enum llm_kv {
LLM_KV_TOKENIZER_MASK_ID,
LLM_KV_TOKENIZER_ADD_BOS,
LLM_KV_TOKENIZER_ADD_EOS,
LLM_KV_TOKENIZER_ADD_SEP,
LLM_KV_TOKENIZER_ADD_PREFIX,
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
Expand Down Expand Up @@ -439,3 +441,6 @@ const char * llm_arch_name(llm_arch arch);
llm_arch llm_arch_from_string(const std::string & name);

const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);

bool llm_arch_is_recurrent(const llm_arch & arch);
bool llm_arch_is_hybrid (const llm_arch & arch);
Loading
Loading