Skip to content

Commit 8ddaf25

Browse files
committed
Fix some state regressions... still wip
1 parent 6942c85 commit 8ddaf25

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
154154

155155
if (!ggml_is_quantized(t->type)) {
156156
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
157-
ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
157+
ggml_print_tensor(data, t->type, t->ne, t->nb, 8);
158158
}
159159

160160
return true;

examples/model-conversion/scripts/causal/run-org-model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,15 +185,16 @@ def fn(_m, input, output):
185185
# of using AutoModelForCausalLM.
186186
print(f"Model class: {model.__class__.__name__}")
187187

188+
device = next(model.parameters()).device
188189
prompt = "Hello, my name is"
189-
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
190+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
190191

191192
print(f"Input tokens: {input_ids}")
192193
print(f"Input text: {repr(prompt)}")
193194
print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
194195

195196
with torch.no_grad():
196-
outputs = model(input_ids.to("cuda"))
197+
outputs = model(input_ids)
197198
logits = outputs.logits
198199

199200
# Extract logits for the last token (next token prediction)

src/models/llm_build_qwen3next.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,9 @@ ggml_tensor * llm_build_qwen3next::build_qwen3next_linear_attn_layer(llm_graph_i
528528
(conv_kernel_size - 1) * ggml_element_size(conv_output));
529529
cb(conv_output_no_padding, "conv_output_no_padding", il);
530530

531-
// Take only the last n_seq_tokens values
532-
ggml_tensor * conv_output_proper = ggml_view_4d(ctx0, conv_output_no_padding, n_seq_tokens, conv_output_no_padding->ne[1],
533-
conv_output_no_padding->ne[2], conv_output_no_padding->ne[3], conv_output_no_padding->nb[1],
534-
conv_output_no_padding->nb[2], conv_output_no_padding->nb[3], (conv_output_no_padding->ne[0] - n_seq_tokens) * ggml_element_size(conv_output_no_padding));
531+
// Take only the first n_seq_tokens values
532+
ggml_tensor * conv_output_proper = ggml_view_4d(ctx0, conv_output_no_padding, n_seq_tokens, conv_output_no_padding->ne[1], conv_output_no_padding->ne[2], conv_output_no_padding->ne[3],
533+
conv_output_no_padding->nb[1], conv_output_no_padding->nb[2], conv_output_no_padding->nb[3], 0);
535534
cb(conv_output_proper, "conv_output_proper", il);
536535

537536
conv_output_proper = ggml_permute(ctx0, conv_output_proper, 0, 1, 3, 2);

0 commit comments

Comments
 (0)