Skip to content

Commit 05329e7

Browse files
Merge qwen3_next into testing-branch10 (took qwen3_next version for src/llama-model.cpp)
1 parent d89b381 commit 05329e7

File tree

202 files changed

+16196
-13249
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+16196
-13249
lines changed

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19511951
params.kv_unified = true;
19521952
}
19531953
).set_env("LLAMA_ARG_KV_SPLIT"));
1954+
add_opt(common_arg(
1955+
{"--dump-cache"},
1956+
"dump cache statistics after each token generation",
1957+
[](common_params & params) {
1958+
params.dump_cache = true;
1959+
}
1960+
).set_examples({LLAMA_EXAMPLE_MAIN}));
19541961
add_opt(common_arg(
19551962
{"--no-context-shift"},
19561963
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ struct common_params {
398398

399399
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
400400
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
401+
402+
bool dump_cache = false; // dump cache statistics after each token
401403

402404
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
403405

convert_hf_to_gguf.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3826,6 +3826,36 @@ def set_vocab(self):
38263826
super().set_vocab()
38273827

38283828

3829+
@ModelBase.register("Qwen3NextForCausalLM")
3830+
class Qwen3NextModel(Qwen3MoeModel):
3831+
model_arch = gguf.MODEL_ARCH.QWEN3NEXT
3832+
3833+
def set_gguf_parameters(self):
3834+
super().set_gguf_parameters()
3835+
self.gguf_writer.add_ssm_conv_kernel(self.find_hparam(["linear_conv_kernel_dim"]))
3836+
self.gguf_writer.add_ssm_state_size(self.find_hparam(["linear_key_head_dim"]))
3837+
self.gguf_writer.add_ssm_group_count(self.find_hparam(["linear_num_key_heads"]))
3838+
self.gguf_writer.add_ssm_time_step_rank(self.find_hparam(["linear_num_value_heads"]))
3839+
self.gguf_writer.add_ssm_inner_size(self.find_hparam(['linear_value_head_dim']) * self.find_hparam(['linear_num_value_heads']))
3840+
if (rope_dim := self.hparams.get("head_dim")) is None:
3841+
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
3842+
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.25)))
3843+
3844+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3845+
if name.startswith("mtp"):
3846+
return [] # ignore MTP layers for now
3847+
if name.endswith(".A_log"):
3848+
data_torch = -torch.exp(data_torch)
3849+
elif name.endswith(".dt_bias"):
3850+
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
3851+
elif "conv1d" in name:
3852+
data_torch = data_torch.squeeze()
3853+
elif name.endswith("norm.weight") and not name.endswith("linear_attn.norm.weight"):
3854+
data_torch = data_torch + 1
3855+
3856+
yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
3857+
3858+
38293859
@ModelBase.register("GPT2LMHeadModel")
38303860
class GPT2Model(TextModel):
38313861
model_arch = gguf.MODEL_ARCH.GPT2

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
154154

155155
if (!ggml_is_quantized(t->type)) {
156156
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
157-
ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
157+
ggml_print_tensor(data, t->type, t->ne, t->nb, 8);
158158
}
159159

160160
return true;

examples/model-conversion/scripts/causal/run-converted-model.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ set -e
44

55
# First try command line argument, then environment variable, then file
66
CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
7+
MODEL_TESTING_PROMPT="${2:-"$MODEL_TESTING_PROMPT"}"
8+
9+
if [ -z "$MODEL_TESTING_PROMPT"]; then
10+
MODEL_TESTING_PROMPT="Hello, my name is"
11+
fi
712

813
# Final check if we have a model path
914
if [ -z "$CONVERTED_MODEL" ]; then
@@ -14,7 +19,8 @@ if [ -z "$CONVERTED_MODEL" ]; then
1419
fi
1520

1621
echo $CONVERTED_MODEL
22+
echo $MODEL_TESTING_PROMPT
1723

1824
cmake --build ../../build --target llama-logits -j8
1925

20-
../../build/bin/llama-logits -m "$CONVERTED_MODEL" "Hello, my name is"
26+
../../build/bin/llama-logits -m "$CONVERTED_MODEL" "$MODEL_TESTING_PROMPT"

0 commit comments

Comments
 (0)