Skip to content

Commit b7f5024

Browse files
committed
convert : correct gemma 3n conversion
1 parent caf5681 commit b7f5024

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

gguf-py/gguf/gguf_writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,8 +714,8 @@ def add_max_alibi_bias(self, bias: float) -> None:
714714
def add_clamp_kqv(self, value: float) -> None:
715715
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
716716

717-
def add_shared_kv_layers(self, value: float) -> None:
718-
self.add_float32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
717+
def add_shared_kv_layers(self, value: int) -> None:
718+
self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
719719

720720
def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
721721
self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)

src/llama-quant.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
894894
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
895895
new_type = params->token_embedding_type;
896896
}
897+
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "per_layer_token_embd.weight") == 0) {
898+
new_type = params->token_embedding_type;
899+
}
897900
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
898901
new_type = params->output_tensor_type;
899902
}

0 commit comments

Comments
 (0)