Skip to content

Commit d04b4ee

Browse files
committed
merge not working
2 parents 8b6dfbd + 92ecdcc commit d04b4ee

File tree

13 files changed

+479
-100
lines changed

13 files changed

+479
-100
lines changed

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ jobs:
448448
shell: bash
449449

450450
env:
451-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
451+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
452452
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
453453
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
454454
steps:

convert_hf_to_gguf.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ def prepare_tensors(self):
308308
gguf.MODEL_TENSOR.TIME_MIX_LERP_FUSED,
309309
gguf.MODEL_TENSOR.POSNET_NORM1,
310310
gguf.MODEL_TENSOR.POSNET_NORM2,
311+
gguf.MODEL_TENSOR.V_ENC_EMBD_POS,
311312
)
312313
)
313314
or not new_name.endswith(".weight")
@@ -2092,6 +2093,26 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
20922093
return super().modify_tensors(data_torch, name, bid)
20932094

20942095

2096+
@ModelBase.register("Llama4ForConditionalGeneration")
2097+
class Llama4VisionModel(VisionModel):
2098+
def set_gguf_parameters(self):
2099+
super().set_gguf_parameters()
2100+
self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.LLAMA4)
2101+
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams["norm_eps"])
2102+
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / self.hparams["pixel_shuffle_ratio"]))
2103+
assert self.hparams["hidden_act"] == "gelu"
2104+
self.gguf_writer.add_vision_use_gelu(True)
2105+
2106+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2107+
del bid # unused
2108+
if "multi_modal_projector" in name or "vision_model" in name:
2109+
# process vision tensors
2110+
if "positional_embedding_vlm" in name and ".weight" not in name:
2111+
name += ".weight"
2112+
return [(self.map_tensor_name(name), data_torch)]
2113+
return []
2114+
2115+
20952116
@ModelBase.register("Mistral3ForConditionalGeneration")
20962117
class Mistral3Model(LlamaModel):
20972118
model_arch = gguf.MODEL_ARCH.LLAMA

docs/multimodal.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,7 @@ NOTE: some models may require large context window, for example: `-c 8192`
7474
(tool_name) -hf ggml-org/InternVL3-2B-Instruct-GGUF
7575
(tool_name) -hf ggml-org/InternVL3-8B-Instruct-GGUF
7676
(tool_name) -hf ggml-org/InternVL3-14B-Instruct-GGUF
77+
78+
# Llama 4 Scout
79+
(tool_name) -hf ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF
7780
```

ggml/include/ggml-opt.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ extern "C" {
128128
// set gradients to zero, initilize loss, and optionally reset the optimizer
129129
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
130130

131+
GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
132+
131133
// get underlying tensors that store data
132134
// if not using static graphs these pointers become invalid with the next call to ggml_opt_alloc
133135
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor

ggml/src/ggml-opt.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,10 @@ void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer) {
576576
}
577577
}
578578

579+
bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx) {
580+
return opt_ctx->static_graphs;
581+
}
582+
579583
struct ggml_tensor * ggml_opt_inputs(ggml_opt_context_t opt_ctx) {
580584
return opt_ctx->inputs;
581585
}
@@ -842,6 +846,7 @@ void ggml_opt_epoch(
842846
int64_t idata_split,
843847
ggml_opt_epoch_callback callback_train,
844848
ggml_opt_epoch_callback callback_eval) {
849+
GGML_ASSERT(ggml_opt_static_graphs(opt_ctx) && "ggml_opt_epoch requires static graphs");
845850
struct ggml_tensor * inputs = ggml_opt_inputs(opt_ctx);
846851
struct ggml_tensor * labels = ggml_opt_labels(opt_ctx);
847852
struct ggml_tensor * data = ggml_opt_dataset_data(dataset);

ggml/src/ggml.c

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,17 @@
6565
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
6666
float ggml_table_f32_f16[1 << 16];
6767

68-
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
69-
(!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
68+
#if defined(__linux__) || \
69+
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
70+
(defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
71+
7072
#include <unistd.h>
7173
#include <sys/types.h>
7274
#include <sys/stat.h>
7375
#include <sys/wait.h>
76+
#if defined(__linux__)
77+
#include <sys/prctl.h>
78+
#endif
7479

7580
#if defined(__ANDROID__)
7681
#include <unwind.h>
@@ -134,10 +139,36 @@ static void ggml_print_backtrace(void) {
134139
if (GGML_NO_BACKTRACE) {
135140
return;
136141
}
137-
char attach[32];
138-
snprintf(attach, sizeof(attach), "attach %d", getpid());
139-
int pid = fork();
140-
if (pid == 0) {
142+
#if defined(__linux__)
143+
FILE * f = fopen("/proc/self/status", "r");
144+
size_t size = 0;
145+
char * line = NULL;
146+
ssize_t length = 0;
147+
while ((length = getline(&line, &size, f)) > 0) {
148+
if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
149+
(length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
150+
// Already being debugged, and the breakpoint is the later abort()
151+
free(line);
152+
fclose(f);
153+
return;
154+
}
155+
}
156+
free(line);
157+
fclose(f);
158+
int lock[2] = { -1, -1 };
159+
(void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
160+
#endif
161+
const int parent_pid = getpid();
162+
const int child_pid = fork();
163+
if (child_pid < 0) { // error
164+
return;
165+
} else if (child_pid == 0) { // child
166+
char attach[32];
167+
snprintf(attach, sizeof(attach), "attach %d", parent_pid);
168+
#if defined(__linux__)
169+
close(lock[1]);
170+
(void) !read(lock[0], lock, 1);
171+
#endif
141172
// try gdb
142173
execlp("gdb", "gdb", "--batch",
143174
"-ex", "set style enabled on",
@@ -150,18 +181,18 @@ static void ggml_print_backtrace(void) {
150181
execlp("lldb", "lldb", "--batch",
151182
"-o", "bt",
152183
"-o", "quit",
153-
"-p", attach,
184+
"-p", &attach[sizeof("attach ") - 1],
154185
(char *) NULL);
155-
exit(EXIT_FAILURE);
156-
} else {
157-
int wstatus;
158-
waitpid(pid, &wstatus, 0);
159-
if (WIFEXITED(wstatus)) {
160-
if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
161-
// gdb failed, fallback to backtrace_symbols
162-
ggml_print_backtrace_symbols();
163-
}
164-
}
186+
// gdb failed, fallback to backtrace_symbols
187+
ggml_print_backtrace_symbols();
188+
_Exit(0);
189+
} else { // parent
190+
#if defined(__linux__)
191+
prctl(PR_SET_PTRACER, child_pid);
192+
close(lock[1]);
193+
close(lock[0]);
194+
#endif
195+
waitpid(child_pid, NULL, 0);
165196
}
166197
}
167198
#else

gguf-py/gguf/constants.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -482,14 +482,15 @@ class MODEL_TENSOR(IntEnum):
482482
V_ENC_EMBD_CLS = auto()
483483
V_ENC_EMBD_PATCH = auto()
484484
V_ENC_EMBD_POS = auto()
485+
V_ENC_INPUT_NORM = auto()
485486
V_ENC_ATTN_Q = auto()
486487
V_ENC_ATTN_Q_NORM = auto()
487488
V_ENC_ATTN_K = auto()
488489
V_ENC_ATTN_K_NORM = auto()
489490
V_ENC_ATTN_V = auto()
490-
V_ENC_INPUT_NORM = auto()
491-
V_ENC_OUTPUT = auto()
492-
V_ENC_OUTPUT_NORM = auto()
491+
V_ENC_ATTN_O = auto()
492+
V_ENC_ATTN_O_NORM = auto()
493+
V_ENC_POST_ATTN_NORM = auto()
493494
V_ENC_FFN_UP = auto()
494495
V_ENC_FFN_GATE = auto()
495496
V_ENC_FFN_DOWN = auto()
@@ -749,8 +750,9 @@ class MODEL_TENSOR(IntEnum):
749750
MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
750751
MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
751752
MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
752-
MODEL_TENSOR.V_ENC_OUTPUT: "v.blk.{bid}.attn_out",
753-
MODEL_TENSOR.V_ENC_OUTPUT_NORM: "v.blk.{bid}.ln2",
753+
MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
754+
MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
755+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
754756
MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
755757
MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
756758
MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
@@ -785,14 +787,15 @@ class MODEL_TENSOR(IntEnum):
785787
MODEL_TENSOR.V_ENC_EMBD_CLS,
786788
MODEL_TENSOR.V_ENC_EMBD_PATCH,
787789
MODEL_TENSOR.V_ENC_EMBD_POS,
790+
MODEL_TENSOR.V_ENC_INPUT_NORM,
788791
MODEL_TENSOR.V_ENC_ATTN_Q,
789792
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
790793
MODEL_TENSOR.V_ENC_ATTN_K,
791794
MODEL_TENSOR.V_ENC_ATTN_K_NORM,
792795
MODEL_TENSOR.V_ENC_ATTN_V,
793-
MODEL_TENSOR.V_ENC_INPUT_NORM,
794-
MODEL_TENSOR.V_ENC_OUTPUT,
795-
MODEL_TENSOR.V_ENC_OUTPUT_NORM,
796+
MODEL_TENSOR.V_ENC_ATTN_O,
797+
MODEL_TENSOR.V_ENC_ATTN_O_NORM,
798+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
796799
MODEL_TENSOR.V_ENC_FFN_UP,
797800
MODEL_TENSOR.V_ENC_FFN_GATE,
798801
MODEL_TENSOR.V_ENC_FFN_DOWN,
@@ -2180,6 +2183,7 @@ class VisionProjectorType:
21802183
GEMMA3 = "gemma3"
21812184
IDEFICS3 = "idefics3"
21822185
PIXTRAL = "pixtral"
2186+
LLAMA4 = "llama4"
21832187
QWEN2VL = "qwen2vl_merger"
21842188
QWEN25VL = "qwen2.5vl_merger"
21852189
INTERNVL = "internvl"

gguf-py/gguf/tensor_mapping.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,10 +902,12 @@ class TensorNameMap:
902902

903903
MODEL_TENSOR.V_MMPROJ_FC: (
904904
"model.connector.modality_projection.proj", # SmolVLM
905+
"multi_modal_projector.linear_1", # llama 4
905906
),
906907

907908
MODEL_TENSOR.V_MMPROJ_MLP: (
908909
"model.mm_projector.mlp.mlp.{bid}",
910+
"vision_model.vision_adapter.mlp.fc{bid}", # llama 4
909911
"mlp1.{bid}", # InternVL
910912
),
911913

@@ -915,26 +917,30 @@ class TensorNameMap:
915917

916918
MODEL_TENSOR.V_ENC_EMBD_CLS: (
917919
"vision_tower.vision_model.embeddings.class_embedding",
920+
"vision_model.class_embedding", # llama 4
918921
),
919922

920923
MODEL_TENSOR.V_ENC_EMBD_PATCH: (
921924
"vision_tower.vision_model.embeddings.patch_embedding",
922925
"vpm.embeddings.patch_embedding",
923926
"model.vision_model.embeddings.patch_embedding", # SmolVLM
924927
"vision_tower.patch_conv", # pixtral
928+
"vision_model.patch_embedding.linear", # llama 4
925929
"visual.patch_embed.proj", # qwen2vl
926930
),
927931

928932
MODEL_TENSOR.V_ENC_EMBD_POS: (
929933
"vision_tower.vision_model.embeddings.position_embedding",
930934
"vpm.embeddings.position_embedding",
931935
"model.vision_model.embeddings.position_embedding", # SmolVLM
936+
"vision_model.positional_embedding_vlm", # llama 4
932937
),
933938

934939
MODEL_TENSOR.V_ENC_ATTN_Q: (
935940
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
936941
"vpm.encoder.layers.{bid}.self_attn.q_proj",
937942
"model.vision_model.encoder.layers.{bid}.self_attn.q_proj", # SmolVLM
943+
"vision_model.model.layers.{bid}.self_attn.q_proj", # llama4
938944
"vision_tower.transformer.layers.{bid}.attention.q_proj", # pixtral
939945
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
940946
),
@@ -947,6 +953,7 @@ class TensorNameMap:
947953
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
948954
"vpm.encoder.layers.{bid}.self_attn.k_proj",
949955
"model.vision_model.encoder.layers.{bid}.self_attn.k_proj", # SmolVLM
956+
"vision_model.model.layers.{bid}.self_attn.k_proj", # llama4
950957
"vision_tower.transformer.layers.{bid}.attention.k_proj", # pixtral
951958
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
952959
),
@@ -959,6 +966,7 @@ class TensorNameMap:
959966
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
960967
"vpm.encoder.layers.{bid}.self_attn.v_proj",
961968
"model.vision_model.encoder.layers.{bid}.self_attn.v_proj", # SmolVLM
969+
"vision_model.model.layers.{bid}.self_attn.v_proj", # llama4
962970
"vision_tower.transformer.layers.{bid}.attention.v_proj", # pixtral
963971
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
964972
),
@@ -969,23 +977,26 @@ class TensorNameMap:
969977
"vpm.encoder.layers.{bid}.layer_norm1",
970978
"model.vision_model.encoder.layers.{bid}.layer_norm1", # SmolVLM
971979
"vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
980+
"vision_model.model.layers.{bid}.input_layernorm", # llama4
972981
"visual.blocks.{bid}.norm1", # qwen2vl
973982
),
974983

975-
MODEL_TENSOR.V_ENC_OUTPUT: (
984+
MODEL_TENSOR.V_ENC_ATTN_O: (
976985
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
977986
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
978987
"vpm.encoder.layers.{bid}.self_attn.out_proj",
979988
"model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM
989+
"vision_model.model.layers.{bid}.self_attn.o_proj", # llama4
980990
"vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
981991
"visual.blocks.{bid}.attn.proj", # qwen2vl
982992
),
983993

984-
MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
994+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
985995
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
986996
"vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
987997
"vpm.encoder.layers.{bid}.layer_norm2",
988998
"model.vision_model.encoder.layers.{bid}.layer_norm2", # SmolVLM
999+
"vision_model.model.layers.{bid}.post_attention_layernorm", # llama4
9891000
"vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
9901001
"visual.blocks.{bid}.norm2", # qwen2vl
9911002
),
@@ -995,6 +1006,7 @@ class TensorNameMap:
9951006
"vpm.encoder.layers.{bid}.mlp.fc1",
9961007
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
9971008
"vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral
1009+
"vision_model.model.layers.{bid}.mlp.fc1", # llama4
9981010
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
9991011
"visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
10001012
),
@@ -1009,6 +1021,7 @@ class TensorNameMap:
10091021
"vpm.encoder.layers.{bid}.mlp.fc2",
10101022
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
10111023
"vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral
1024+
"vision_model.model.layers.{bid}.mlp.fc2", # llama4
10121025
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
10131026
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
10141027
),
@@ -1024,11 +1037,13 @@ class TensorNameMap:
10241037
MODEL_TENSOR.V_PRE_NORM: (
10251038
"vision_tower.vision_model.pre_layrnorm",
10261039
"vision_tower.ln_pre", # pixtral
1040+
"vision_model.layernorm_pre", # llama4
10271041
),
10281042

10291043
MODEL_TENSOR.V_POST_NORM: (
10301044
"vision_tower.vision_model.post_layernorm",
10311045
"model.vision_model.post_layernorm", # SmolVLM
1046+
"vision_model.layernorm_post", # llama4
10321047
"visual.merger.ln_q", # qwen2vl
10331048
),
10341049

0 commit comments

Comments
 (0)