Skip to content

Commit 7c7c594

Browse files
committed
Merge branch 'concedo_experimental' into crokeso
2 parents 5c75115 + b0b7a07 commit 7c7c594

17 files changed

+720
-400
lines changed

convert_hf_to_gguf.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
11731173
if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
11741174
# ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
11751175
res = "lfm2"
1176+
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
1177+
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
1178+
res = "exaone4"
11761179

11771180
if res is None:
11781181
logger.warning("\n")
@@ -3240,11 +3243,12 @@ def set_gguf_parameters(self):
32403243
self.gguf_writer.add_expert_used_count(self.hparams["moe_k"])
32413244
self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"])
32423245
self.gguf_writer.add_leading_dense_block_count(self.hparams["moe_layer_start_index"])
3243-
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
32443246
if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
32453247
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
3246-
if (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None:
3247-
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads)
3248+
if (shared_expert_count := self.hparams.get('moe_num_shared_experts')) is not None:
3249+
self.gguf_writer.add_expert_shared_count(shared_expert_count)
3250+
if shared_expert_count > 0 and (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None:
3251+
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads)
32483252

32493253
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
32503254
# Modify correction bias name as in DeepseekV2
@@ -7109,6 +7113,75 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
71097113
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
71107114

71117115

7116+
@ModelBase.register("Exaone4ForCausalLM")
7117+
class Exaone4Model(TextModel):
7118+
model_arch = gguf.MODEL_ARCH.EXAONE4
7119+
7120+
def set_vocab(self):
7121+
tokens, toktypes, tokpre = self.get_vocab_base()
7122+
self.gguf_writer.add_tokenizer_model("gpt2")
7123+
self.gguf_writer.add_tokenizer_pre(tokpre)
7124+
self.gguf_writer.add_token_list(tokens)
7125+
self.gguf_writer.add_token_types(toktypes)
7126+
7127+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
7128+
special_vocab.add_to_gguf(self.gguf_writer)
7129+
7130+
def set_gguf_parameters(self):
7131+
super().set_gguf_parameters()
7132+
hparams = self.hparams
7133+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
7134+
7135+
if hparams.get("sliding_window") is not None:
7136+
self.gguf_writer.add_sliding_window(hparams["sliding_window"])
7137+
if "layer_types" in hparams:
7138+
self.gguf_writer.add_sliding_window_pattern([t == "sliding_attention" for t in hparams["layer_types"]])
7139+
elif "sliding_window_pattern" in hparams:
7140+
sliding_window_pattern = []
7141+
if isinstance(hparams["sliding_window_pattern"], str): # e.g. LLLG
7142+
for i in range(hparams["num_hidden_layers"]):
7143+
sliding_window_pattern.append(hparams["sliding_window_pattern"][i % len(hparams["sliding_window_pattern"])] == "L")
7144+
if isinstance(hparams["sliding_window_pattern"], int): # e.g. 4
7145+
for i in range(hparams["num_hidden_layers"]):
7146+
sliding_window_pattern.append((i + 1) % hparams["sliding_window_pattern"] != 0)
7147+
if len(sliding_window_pattern) == hparams["num_hidden_layers"]:
7148+
self.gguf_writer.add_sliding_window_pattern(sliding_window_pattern)
7149+
7150+
rope_scaling = self.hparams.get("rope_scaling") or {}
7151+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
7152+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
7153+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
7154+
7155+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
7156+
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
7157+
if rope_scaling.get("rope_type", '').lower() == "llama3":
7158+
base = self.hparams.get("rope_theta", 10_000.0)
7159+
if (dim := self.hparams.get("head_dim")) is None:
7160+
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
7161+
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
7162+
7163+
factor = rope_scaling.get("factor", 16.0)
7164+
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
7165+
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
7166+
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
7167+
7168+
low_freq_wavelen = old_context_len / low_freq_factor
7169+
high_freq_wavelen = old_context_len / high_freq_factor
7170+
7171+
rope_factors = []
7172+
for freq in freqs:
7173+
wavelen = 2 * math.pi / freq
7174+
if wavelen < high_freq_wavelen:
7175+
rope_factors.append(1)
7176+
elif wavelen > low_freq_wavelen:
7177+
rope_factors.append(factor)
7178+
else:
7179+
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
7180+
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
7181+
7182+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
7183+
7184+
71127185
@ModelBase.register("GraniteForCausalLM")
71137186
class GraniteModel(LlamaModel):
71147187
"""Conversion for IBM's GraniteForCausalLM"""

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class TOKENIZER_TYPE(IntEnum):
129129
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
130130
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
131131
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
132+
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
132133
]
133134

134135
# some models are known to be broken upstream, so we will skip them as exceptions

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class MODEL_ARCH(IntEnum):
356356
JAIS = auto()
357357
NEMOTRON = auto()
358358
EXAONE = auto()
359+
EXAONE4 = auto()
359360
GRANITE = auto()
360361
GRANITE_MOE = auto()
361362
GRANITE_HYBRID = auto()
@@ -674,6 +675,7 @@ class MODEL_TENSOR(IntEnum):
674675
MODEL_ARCH.JAIS: "jais",
675676
MODEL_ARCH.NEMOTRON: "nemotron",
676677
MODEL_ARCH.EXAONE: "exaone",
678+
MODEL_ARCH.EXAONE4: "exaone4",
677679
MODEL_ARCH.GRANITE: "granite",
678680
MODEL_ARCH.GRANITE_MOE: "granitemoe",
679681
MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
@@ -2222,6 +2224,23 @@ class MODEL_TENSOR(IntEnum):
22222224
MODEL_TENSOR.FFN_DOWN,
22232225
MODEL_TENSOR.FFN_UP,
22242226
],
2227+
MODEL_ARCH.EXAONE4: [
2228+
MODEL_TENSOR.TOKEN_EMBD,
2229+
MODEL_TENSOR.OUTPUT_NORM,
2230+
MODEL_TENSOR.OUTPUT,
2231+
MODEL_TENSOR.ROPE_FREQS,
2232+
MODEL_TENSOR.ATTN_Q,
2233+
MODEL_TENSOR.ATTN_Q_NORM,
2234+
MODEL_TENSOR.ATTN_K,
2235+
MODEL_TENSOR.ATTN_K_NORM,
2236+
MODEL_TENSOR.ATTN_V,
2237+
MODEL_TENSOR.ATTN_OUT,
2238+
MODEL_TENSOR.ATTN_POST_NORM,
2239+
MODEL_TENSOR.FFN_GATE,
2240+
MODEL_TENSOR.FFN_DOWN,
2241+
MODEL_TENSOR.FFN_UP,
2242+
MODEL_TENSOR.FFN_POST_NORM,
2243+
],
22252244
MODEL_ARCH.GRANITE: [
22262245
MODEL_TENSOR.TOKEN_EMBD,
22272246
MODEL_TENSOR.OUTPUT_NORM,

gpttype_adapter.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3311,6 +3311,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_int
33113311
}
33123312
else
33133313
{
3314+
media_composite_image_signature = ""; //force invalidate
33143315
printf("\nWarning: Vision Image excluded - Context size too low or not enough clip tokens! (needed %d)\nImage will be IGNORED! You probably want to relaunch with a larger context size!\n",cliptokensneeded);
33153316
}
33163317
media_objects[i].mediachunks.push_back(chunk);
@@ -3364,6 +3365,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_int
33643365
}
33653366
else
33663367
{
3368+
media_composite_image_signature = ""; //force invalidate
33673369
printf("\nWarning: Audio Embd excluded - Context size too low or not enough clip tokens! (needed %d)\nAudio will be IGNORED! You probably want to relaunch with a larger context size!\n",cliptokensneeded);
33683370
}
33693371

@@ -3570,7 +3572,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
35703572
media_composite_image_signature = new_media_composite;
35713573
if(debugmode==1 && !is_quiet)
35723574
{
3573-
printf("\nLLAVA images changed, existing cache invalidated");
3575+
printf("\nAttached media changed, existing multimodal cache invalidated");
35743576
}
35753577
media_data_changed = true;
35763578
}
@@ -3775,7 +3777,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37753777
{
37763778
if(last_media_mem.size() + kcpp_data->n_predict + 4 > nctx)
37773779
{
3778-
printf("\nWarning: Too many LLaVA tokens, max context exceeded! They will be ignored!\n");
3780+
printf("\nWarning: Too many multimodal tokens, max context exceeded! They will be ignored!\n");
37793781
}
37803782
else
37813783
{
@@ -4629,7 +4631,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
46294631
{
46304632
PrepareMediaEmbds(nctx, media_intro);
46314633
media_embds_built = true;
4632-
printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
4634+
printf("\nSomehow media embeds was not prepared (maybe no fast forward), rebuilding it...\n");
46334635
}
46344636

46354637
//if partial batch, dispatch existing first
@@ -4664,11 +4666,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
46644666
auto evr = llama_decode(llama_ctx_v4, batch.batch);
46654667
if(evr!=0)
46664668
{
4667-
printf("\nError when appending llava intro: %d\n",evr);
4669+
printf("\nError when appending media intro: %d\n",evr);
46684670
}
46694671
else
46704672
{
4671-
printf("\rProcessing LLaVa Intro (%d tokens)",introsize);
4673+
printf("\rProcessing Media Intro (%d tokens)",introsize);
46724674
}
46734675
n_past += introsize;
46744676
llavatokensevaled += introsize;
@@ -4703,7 +4705,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
47034705
if(!err)
47044706
{
47054707
media_composite_image_signature = ""; //force invalidate
4706-
fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past);
4708+
fprintf(stderr, "\nFailed to eval media tokens at %d!\n",n_past);
47074709
output.text = nullptr;
47084710
output.status = 0;
47094711
output.prompt_tokens = output.completion_tokens = 0;
@@ -4733,7 +4735,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
47334735
if(llavatokenscounted!=llavatokensevaled)
47344736
{
47354737
media_composite_image_signature = ""; //force invalidate
4736-
fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
4738+
fprintf(stderr, "\nMedia tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
47374739
output.text = nullptr;
47384740
output.status = 0;
47394741
output.prompt_tokens = output.completion_tokens = 0;

kcpp_adapters/AutoGuess.json

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@
2424
"tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
2525
}
2626
}, {
27-
"search": ["<|im_start|>assistant", "<|im_end|>"],
28-
"name": "ChatML (Generic).",
27+
"search": ["<|im_user|>user<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
28+
"name": "ChatML (Kimi).",
2929
"adapter": {
30-
"system_start": "<|im_start|>system\n",
31-
"system_end": "<|im_end|>\n",
32-
"user_start": "<|im_start|>user\n",
33-
"user_end": "<|im_end|>\n",
34-
"assistant_start": "<|im_start|>assistant\n",
35-
"assistant_end": "<|im_end|>\n"
30+
"system_start": "<|im_system|>system<|im_middle|>",
31+
"system_end": "<|im_end|>",
32+
"user_start": "<|im_user|>user<|im_middle|>",
33+
"user_end": "<|im_end|>",
34+
"assistant_start": "<|im_assistant|>assistant<|im_middle|>",
35+
"assistant_end": "<|im_end|>"
3636
}
3737
}, {
3838
"search": ["System role not supported", "<start_of_turn>"],
@@ -111,17 +111,6 @@
111111
"assistant_start": "[/INST]",
112112
"assistant_end": "</s>"
113113
}
114-
}, {
115-
"search": ["[/INST]"],
116-
"name": "Mistral (Generic)",
117-
"adapter": {
118-
"system_start": "[INST]",
119-
"system_end": "[/INST]\n",
120-
"user_start": "[INST]",
121-
"user_end": "",
122-
"assistant_start": "[/INST]\n",
123-
"assistant_end": "</s>"
124-
}
125114
}, {
126115
"search": ["[gMASK]<sop>"],
127116
"name": "GLM-4",
@@ -188,17 +177,6 @@
188177
"assistant_start": "<|bom|><|assistant|>",
189178
"assistant_end": "<|eom|>"
190179
}
191-
}, {
192-
"search": ["<|im_start|>assistant<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
193-
"name": "ChatML (Kimi).",
194-
"adapter": {
195-
"system_start": "<|im_start|>system<|im_middle|>",
196-
"system_end": "<|im_end|>",
197-
"user_start": "<|im_start|>user<|im_middle|>",
198-
"user_end": "<|im_end|>",
199-
"assistant_start": "<|im_start|>assistant<|im_middle|>",
200-
"assistant_end": "<|im_end|>"
201-
}
202180
}, {
203181
"search": ["<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>"],
204182
"name": "Dots",
@@ -221,5 +199,31 @@
221199
"assistant_start": "ASSISTANT: ",
222200
"assistant_end": "</s>\n"
223201
}
202+
},
203+
204+
205+
206+
{
207+
"search": ["[/INST]"],
208+
"name": "Mistral (Generic)",
209+
"adapter": {
210+
"system_start": "[INST]",
211+
"system_end": "[/INST]\n",
212+
"user_start": "[INST]",
213+
"user_end": "",
214+
"assistant_start": "[/INST]\n",
215+
"assistant_end": "</s>"
216+
}
217+
}, {
218+
"search": ["<|im_start|>assistant", "<|im_end|>"],
219+
"name": "ChatML (Generic).",
220+
"adapter": {
221+
"system_start": "<|im_start|>system\n",
222+
"system_end": "<|im_end|>\n",
223+
"user_start": "<|im_start|>user\n",
224+
"user_end": "<|im_end|>\n",
225+
"assistant_start": "<|im_start|>assistant\n",
226+
"assistant_end": "<|im_end|>\n"
227+
}
224228
}
225229
]

0 commit comments

Comments
 (0)