Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
722a963
ggml-cpu : remove stdlib include from repack.cpp (ggml/1276)
danbev Jul 21, 2025
0b441fe
cmake : fix usage issues (ggml/1257)
dg0yt Jul 22, 2025
682df28
Vulkan: Fix fprintf format-security warning (llama/14770)
0cc4m Jul 19, 2025
0c949db
vulkan: Add logging for bf16 features to ggml_vk_print_gpu_info (#132…
Peter0x44 Jul 19, 2025
3accff3
ggml: adds CONV_2D op and direct GEMM Vulkan implementation (llama/14…
etasnadi Jul 19, 2025
cb03991
vulkan/cuda: Fix im2col when KW!=KH (llama/14789)
jeffbolznv Jul 21, 2025
1d1d640
kleidiai: add support for get_rows (llama/14676)
chaxu01 Jul 21, 2025
06c74b3
sycl: Fix im2col (llama/14797)
Rbiessy Jul 21, 2025
4242186
opencl: add conv2d kernel (llama/14403)
rmatif Jul 21, 2025
444a0fe
opencl: fix `im2col` when `KW!=KH` (llama/14803)
CISC Jul 21, 2025
d7494d5
cuda: remove linking to cublasLt (llama/14790)
yeahdongcn Jul 21, 2025
bbaaa93
opencl: remove unreachable `return` (llama/14806)
lhez Jul 22, 2025
0e5770e
cuda : implement bf16 cpy ops and enable bf16 cont (llama/14763)
CISC Jul 22, 2025
7162f92
vulkan: fix rms_norm_mul to handle broadcasting dim0 (llama/14817)
jeffbolznv Jul 22, 2025
c193044
CUDA: add fused rms norm (llama/14800)
am17an Jul 23, 2025
88853c4
CANN: weight format to NZ for Ascend310P3 (llama/14407)
tqgy6 Jul 23, 2025
6ca9a0e
ggml: fix loongarch quantize_row_q8_1 error (llama/14827)
lixing-star Jul 23, 2025
c137464
CUDA: fix quantized KV cache + multiple sequences (llama/14822)
JohannesGaessler Jul 23, 2025
10d2a51
CUDA: fix compilation with GGML_CUDA_F16 (llama/14837)
JohannesGaessler Jul 23, 2025
9119c3c
CUDA: fix overflow in FA, tune performance (llama/14840)
JohannesGaessler Jul 23, 2025
510b3aa
sycl: fix undefined variable in work group size check (llama/14843)
djeong20 Jul 24, 2025
3a814b9
metal : fix fusion across different encoders (llama/14849)
ggerganov Jul 24, 2025
805c890
sycl: fixed semantics of block offset calculation (llama/14814)
Alcpz Jul 24, 2025
f8122d2
cmake : Indent ggml-config.cmake (ggml/1310)
dg0yt Jul 24, 2025
e577bb1
musa: upgrade musa sdk to rc4.2.0 (llama/14498)
yeahdongcn Jul 24, 2025
2feb28a
sched : fix multiple evaluations of the same graph with pipeline para…
slaren Jul 25, 2025
0e5d0ee
rpc : check for null buffers in get/set/copy tensor endpoints (llama/…
struct Jul 25, 2025
58942e7
ggml : remove invalid portPos specifiers from dot files (llama/14838)
ORippler Jul 25, 2025
5fdfe3b
opencl: add fused `rms_norm_mul` (llama/14841)
lhez Jul 25, 2025
cb0a47b
metal: SSM_SCAN performance (llama/14743)
gabe-l-hart Jul 25, 2025
7beedb1
ggml-cpu : disable GGML_NNPA by default due to instability (llama/14880)
taronaeo Jul 25, 2025
ef7a7f9
musa: fix build warnings (unused variable) (llama/14869)
yeahdongcn Jul 26, 2025
5962f89
CANN: Implement GLU ops (llama/14884)
hipudding Jul 26, 2025
0d5bf5e
HIP: Enable Matrix cores for MMQ Kernels, Enable stream-K for CDNA 3 …
deepsek Jul 26, 2025
4297312
vulkan: skip empty set_rows to avoid invalid API usage (llama/14860)
jeffbolznv Jul 27, 2025
6ef17cd
vulkan : add fp16 support for the conv_2d kernel (llama/14872)
Green-Sky Jul 27, 2025
c189a3c
sync : ggml
ggerganov Jul 28, 2025
8f48565
talk-llama : sync llama.cpp
ggerganov Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions examples/talk-llama/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_PHI3, "phi3" },
{ LLM_ARCH_PHIMOE, "phimoe" },
{ LLM_ARCH_PLAMO, "plamo" },
{ LLM_ARCH_PLAMO2, "plamo2" },
{ LLM_ARCH_CODESHELL, "codeshell" },
{ LLM_ARCH_ORION, "orion" },
{ LLM_ARCH_INTERNLM2, "internlm2" },
Expand Down Expand Up @@ -67,6 +68,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_JAIS, "jais" },
{ LLM_ARCH_NEMOTRON, "nemotron" },
{ LLM_ARCH_EXAONE, "exaone" },
{ LLM_ARCH_EXAONE4, "exaone4" },
{ LLM_ARCH_RWKV6, "rwkv6" },
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
{ LLM_ARCH_RWKV7, "rwkv7" },
Expand All @@ -81,9 +83,11 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_DOTS1, "dots1" },
{ LLM_ARCH_ARCEE, "arcee" },
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
{ LLM_ARCH_SMOLLM3, "smollm3" },
{ LLM_ARCH_LFM2, "lfm2" },
{ LLM_ARCH_DREAM, "dream" },
{ LLM_ARCH_UNKNOWN, "(unknown)" },
};

Expand Down Expand Up @@ -784,6 +788,36 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_PLAMO2,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
},
},
{
LLM_ARCH_CODESHELL,
{
Expand Down Expand Up @@ -1477,6 +1511,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_EXAONE4,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
}
},
{
LLM_ARCH_RWKV6,
{
Expand Down Expand Up @@ -1793,6 +1847,31 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_ERNIE4_5_MOE,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
},
},
{
LLM_ARCH_HUNYUAN_MOE,
{
Expand Down Expand Up @@ -1854,6 +1933,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
}
},
{
LLM_ARCH_DREAM,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_UNKNOWN,
{
Expand Down Expand Up @@ -2094,10 +2190,20 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
switch (arch) {
case LLM_ARCH_JAMBA:
case LLM_ARCH_FALCON_H1:
case LLM_ARCH_PLAMO2:
case LLM_ARCH_GRANITE_HYBRID:
case LLM_ARCH_LFM2:
return true;
default:
return false;
}
}

bool llm_arch_is_diffusion(const llm_arch & arch) {
switch (arch) {
case LLM_ARCH_DREAM:
return true;
default:
return false;
}
}
5 changes: 5 additions & 0 deletions examples/talk-llama/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ enum llm_arch {
LLM_ARCH_PHI3,
LLM_ARCH_PHIMOE,
LLM_ARCH_PLAMO,
LLM_ARCH_PLAMO2,
LLM_ARCH_CODESHELL,
LLM_ARCH_ORION,
LLM_ARCH_INTERNLM2,
Expand Down Expand Up @@ -71,6 +72,7 @@ enum llm_arch {
LLM_ARCH_JAIS,
LLM_ARCH_NEMOTRON,
LLM_ARCH_EXAONE,
LLM_ARCH_EXAONE4,
LLM_ARCH_RWKV6,
LLM_ARCH_RWKV6QWEN2,
LLM_ARCH_RWKV7,
Expand All @@ -85,9 +87,11 @@ enum llm_arch {
LLM_ARCH_DOTS1,
LLM_ARCH_ARCEE,
LLM_ARCH_ERNIE4_5,
LLM_ARCH_ERNIE4_5_MOE,
LLM_ARCH_HUNYUAN_MOE,
LLM_ARCH_SMOLLM3,
LLM_ARCH_LFM2,
LLM_ARCH_DREAM,
LLM_ARCH_UNKNOWN,
};

Expand Down Expand Up @@ -478,3 +482,4 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);

bool llm_arch_is_recurrent(const llm_arch & arch);
bool llm_arch_is_hybrid (const llm_arch & arch);
bool llm_arch_is_diffusion(const llm_arch & arch);
Loading
Loading