Skip to content

Commit a5edd7b

Browse files
committed
Not ready yet
1 parent f9a31ee commit a5edd7b

File tree

7 files changed

+248
-4
lines changed

7 files changed

+248
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 79 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
import re
1313
import sys
14+
from functools import reduce
1415
from enum import IntEnum
1516
from pathlib import Path
1617
from hashlib import sha256
@@ -1246,7 +1247,7 @@ def set_gguf_parameters(self):
12461247
self.gguf_writer.add_vision_embedding_length(self.find_vparam(["hidden_size"]))
12471248
self.gguf_writer.add_vision_feed_forward_length(self.find_vparam(["intermediate_size"]))
12481249
self.gguf_writer.add_vision_block_count(self.find_vparam(self.n_block_keys))
1249-
self.gguf_writer.add_vision_head_count(self.find_vparam(["num_attention_heads"]))
1250+
self.gguf_writer.add_vision_head_count(self.find_vparam(["num_attention_heads", "num_heads"]))
12501251

12511252
# preprocessor config
12521253
self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"])
@@ -2895,14 +2896,15 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
28952896
return [(self.map_tensor_name(name), data_torch)]
28962897

28972898

2898-
@ModelBase.register("Ernie4_5_MoeForCausalLM")
2899+
@ModelBase.register("Ernie4_5_MoeForCausalLM", "Ernie4_5_VLMoeForConditionalGeneration")
28992900
class Ernie4_5MoeModel(Ernie4_5Model):
29002901
model_arch = gguf.MODEL_ARCH.ERNIE4_5_MOE
29012902
_experts: list[dict[str, Tensor]] | None = None
29022903

29032904
def __init__(self, *args, **kwargs):
29042905
super().__init__(*args, **kwargs)
29052906
self._experts = [{} for _ in range(self.block_count)]
2907+
self.split_cache = {}
29062908

29072909
def set_gguf_parameters(self):
29082910
super().set_gguf_parameters()
@@ -2918,6 +2920,18 @@ def set_gguf_parameters(self):
29182920
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads)
29192921

29202922
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2923+
if name.endswith((".weight_1", ".bias_1")):
2924+
self.split_cache[name] = data_torch
2925+
return []
2926+
2927+
part1_name = name + "_1"
2928+
if part1_name in self.split_cache:
2929+
part1_tensor = self.split_cache.pop(part1_name)
2930+
dim = 0
2931+
if 'down' in name or 'proj' in name and 'up' not in name and 'gate' not in name:
2932+
dim = 1
2933+
data_torch = torch.cat((data_torch, part1_tensor), dim=dim)
2934+
29212935
# Modify correction bias name as in DeepseekV2
29222936
if name.endswith("e_score_correction_bias"):
29232937
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
@@ -2949,7 +2963,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29492963
self._experts = [{} for _ in range(self.block_count)]
29502964

29512965
self._experts[bid][name] = data_torch
2952-
2966+
n_experts_val = self.hparams["moe_num_experts"]
2967+
n_experts = reduce(lambda x, y: x + y, n_experts_val, 0) if isinstance(n_experts_val, list) else n_experts_val
29532968
if len(self._experts[bid]) >= n_experts * 3:
29542969
tensors: list[tuple[str, Tensor]] = []
29552970

@@ -3012,6 +3027,67 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
30123027
# skip multimodal tensors
30133028
return []
30143029
return [(self.map_tensor_name(name), data_torch)]
3030+
@ModelBase.register("Ernie4_5_VLMoeForConditionalGeneration")
3031+
class Ernie45VLModel(MmprojModel):
3032+
def __init__(self, *args, **kwargs):
3033+
super().__init__(*args, **kwargs)
3034+
self.model_arch = gguf.MODEL_ARCH.ERNIE4_5_VL_MOE
3035+
if self.hparams_vision is not None and "image_size" not in self.hparams_vision:
3036+
if "size" in self.preprocessor_config and "height" in self.preprocessor_config["size"]:
3037+
self.hparams_vision["image_size"] = self.preprocessor_config["size"]["height"]
3038+
elif "crop_size" in self.preprocessor_config and "height" in self.preprocessor_config["crop_size"]:
3039+
self.hparams_vision["image_size"] = self.preprocessor_config["crop_size"]["height"]
3040+
if self.hparams_vision is not None and "intermediate_size" not in self.hparams_vision:
3041+
self.hparams_vision["intermediate_size"] = self.hparams_vision["hidden_size"] * self.hparams_vision["mlp_ratio"]
3042+
if self.hparams_vision is not None and "num_hidden_layers" not in self.hparams_vision and "num_layers" not in self.hparams_vision:
3043+
# FIXME: This is a placeholder calculation.
3044+
# The actual value may need to be derived differently.
3045+
self.hparams_vision["num_hidden_layers"] = 32
3046+
3047+
def set_gguf_parameters(self):
3048+
# super().set_gguf_parameters() # don't call parent
3049+
vision_config = self.hparams_vision
3050+
assert vision_config is not None
3051+
self.gguf_writer.add_vision_embedding_length(vision_config["hidden_size"])
3052+
self.gguf_writer.add_vision_feed_forward_length(vision_config["intermediate_size"])
3053+
if (block_count := vision_config.get("num_hidden_layers", vision_config.get("num_layers"))) is None:
3054+
raise KeyError("Could not find num_hidden_layers or num_layers in vision config")
3055+
self.gguf_writer.add_vision_block_count(block_count)
3056+
if (head_count := vision_config.get("num_attention_heads", vision_config.get("num_heads"))) is None:
3057+
raise KeyError("Could not find num_attention_heads or num_heads in vision config")
3058+
self.gguf_writer.add_vision_head_count(head_count)
3059+
self.gguf_writer.add_vision_image_size(vision_config["image_size"])
3060+
self.gguf_writer.add_vision_patch_size(vision_config["patch_size"])
3061+
self.gguf_writer.add_vision_projection_dim(self.hparams["hidden_size"])
3062+
self.gguf_writer.add_clip_projector_type("mlp")
3063+
if "spatial_conv_size" in self.hparams:
3064+
self.gguf_writer.add_vision_spatial_merge_size(self.hparams["spatial_conv_size"])
3065+
3066+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3067+
del bid
3068+
if name.startswith("vision_model."):
3069+
if ".attn.qkv." in name:
3070+
if data_torch.ndim == 2: # weight
3071+
c3, _ = data_torch.shape
3072+
else: # bias
3073+
c3 = data_torch.shape[0]
3074+
assert c3 % 3 == 0
3075+
c = c3 // 3
3076+
wq = data_torch[:c]
3077+
wk = data_torch[c: c * 2]
3078+
wv = data_torch[c * 2:]
3079+
yield from [
3080+
(self.map_tensor_name(name.replace("qkv", "q")), wq),
3081+
(self.map_tensor_name(name.replace("qkv", "k")), wk),
3082+
(self.map_tensor_name(name.replace("qkv", "v")), wv),
3083+
]
3084+
return
3085+
if "mm_resampler" in name:
3086+
name = name.replace("mm_resampler", "resampler")
3087+
yield self.map_tensor_name(name), data_torch
3088+
else:
3089+
# This is a projector model, so we skip the text model tensors.
3090+
return
30153091

30163092

30173093
@ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")

gguf-py/gguf/constants.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ class MODEL_ARCH(IntEnum):
365365
ARCEE = auto()
366366
ERNIE4_5 = auto()
367367
ERNIE4_5_MOE = auto()
368+
ERNIE4_5_VL_MOE = auto()
368369
HUNYUAN_MOE = auto()
369370
SMOLLM3 = auto()
370371
LFM2 = auto()
@@ -583,6 +584,7 @@ class MODEL_TENSOR(IntEnum):
583584
V_RESMPL_QUERY = auto() # minicpmv
584585
V_TOK_EMBD_IMG_BREAK = auto() # pixtral
585586
V_MM_PATCH_MERGER = auto() # mistral small 3.1
587+
586588
# audio (mtmd)
587589
A_ENC_EMBD_POS = auto()
588590
A_ENC_CONV1D = auto()
@@ -682,6 +684,7 @@ class MODEL_TENSOR(IntEnum):
682684
MODEL_ARCH.ARCEE: "arcee",
683685
MODEL_ARCH.ERNIE4_5: "ernie4_5",
684686
MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe",
687+
MODEL_ARCH.ERNIE4_5_VL_MOE: "ernie4_5-vl-moe",
685688
MODEL_ARCH.FALCON_H1: "falcon-h1",
686689
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
687690
MODEL_ARCH.SMOLLM3: "smollm3",
@@ -901,6 +904,7 @@ class MODEL_TENSOR(IntEnum):
901904
MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
902905
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
903906
MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
907+
904908
# audio (mtmd)
905909
MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
906910
MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
@@ -2046,6 +2050,67 @@ class MODEL_TENSOR(IntEnum):
20462050
MODEL_TENSOR.FFN_UP_SHEXP,
20472051
MODEL_TENSOR.FFN_EXP_PROBS_B,
20482052
],
2053+
MODEL_ARCH.ERNIE4_5_VL_MOE: [
2054+
MODEL_TENSOR.TOKEN_EMBD,
2055+
MODEL_TENSOR.OUTPUT_NORM,
2056+
MODEL_TENSOR.OUTPUT,
2057+
MODEL_TENSOR.ATTN_NORM,
2058+
MODEL_TENSOR.ATTN_Q,
2059+
MODEL_TENSOR.ATTN_K,
2060+
MODEL_TENSOR.ATTN_V,
2061+
MODEL_TENSOR.ATTN_OUT,
2062+
MODEL_TENSOR.FFN_NORM,
2063+
MODEL_TENSOR.FFN_GATE,
2064+
MODEL_TENSOR.FFN_DOWN,
2065+
MODEL_TENSOR.FFN_UP,
2066+
MODEL_TENSOR.FFN_GATE_INP,
2067+
MODEL_TENSOR.FFN_GATE_EXP,
2068+
MODEL_TENSOR.FFN_DOWN_EXP,
2069+
MODEL_TENSOR.FFN_UP_EXP,
2070+
MODEL_TENSOR.FFN_GATE_SHEXP,
2071+
MODEL_TENSOR.FFN_DOWN_SHEXP,
2072+
MODEL_TENSOR.FFN_UP_SHEXP,
2073+
MODEL_TENSOR.FFN_EXP_PROBS_B,
2074+
MODEL_TENSOR.V_MMPROJ,
2075+
MODEL_TENSOR.V_MMPROJ_FC,
2076+
MODEL_TENSOR.V_MMPROJ_MLP,
2077+
MODEL_TENSOR.V_MMPROJ_PEG,
2078+
MODEL_TENSOR.V_ENC_EMBD_CLS,
2079+
MODEL_TENSOR.V_ENC_EMBD_PATCH,
2080+
MODEL_TENSOR.V_ENC_EMBD_POS,
2081+
MODEL_TENSOR.V_ENC_INPUT_NORM,
2082+
MODEL_TENSOR.V_ENC_ATTN_Q,
2083+
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
2084+
MODEL_TENSOR.V_ENC_ATTN_K,
2085+
MODEL_TENSOR.V_ENC_ATTN_K_NORM,
2086+
MODEL_TENSOR.V_ENC_ATTN_V,
2087+
MODEL_TENSOR.V_ENC_ATTN_O,
2088+
MODEL_TENSOR.V_ENC_ATTN_O_NORM,
2089+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
2090+
MODEL_TENSOR.V_ENC_FFN_UP,
2091+
MODEL_TENSOR.V_ENC_FFN_GATE,
2092+
MODEL_TENSOR.V_ENC_FFN_DOWN,
2093+
MODEL_TENSOR.V_LAYER_SCALE_1,
2094+
MODEL_TENSOR.V_LAYER_SCALE_2,
2095+
MODEL_TENSOR.V_PRE_NORM,
2096+
MODEL_TENSOR.V_POST_NORM,
2097+
MODEL_TENSOR.V_MM_INP_PROJ,
2098+
MODEL_TENSOR.V_MM_INP_NORM,
2099+
MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
2100+
MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
2101+
MODEL_TENSOR.V_RESMPL_ATTN_Q,
2102+
MODEL_TENSOR.V_RESMPL_ATTN_K,
2103+
MODEL_TENSOR.V_RESMPL_ATTN_V,
2104+
MODEL_TENSOR.V_RESMPL_ATTN_OUT,
2105+
MODEL_TENSOR.V_RESMPL_KV,
2106+
MODEL_TENSOR.V_RESMPL_KV_NORM,
2107+
MODEL_TENSOR.V_RESMPL_POST_NORM,
2108+
MODEL_TENSOR.V_RESMPL_Q_NORM,
2109+
MODEL_TENSOR.V_RESMPL_PROJ,
2110+
MODEL_TENSOR.V_RESMPL_QUERY,
2111+
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
2112+
MODEL_TENSOR.V_MM_PATCH_MERGER,
2113+
],
20492114
MODEL_ARCH.PLM: [
20502115
MODEL_TENSOR.TOKEN_EMBD,
20512116
MODEL_TENSOR.OUTPUT,

gguf-py/gguf/tensor_mapping.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,7 @@ class TensorNameMap:
10871087
"vision_tower.patch_conv", # pixtral
10881088
"vision_model.patch_embedding.linear", # llama 4
10891089
"visual.patch_embed.proj", # qwen2vl
1090+
"vision_model.patch_embed.proj",
10901091
),
10911092

10921093
MODEL_TENSOR.V_ENC_EMBD_POS: (
@@ -1103,6 +1104,7 @@ class TensorNameMap:
11031104
"vision_model.model.layers.{bid}.self_attn.q_proj", # llama4
11041105
"vision_tower.transformer.layers.{bid}.attention.q_proj", # pixtral
11051106
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
1107+
"vision_model.blocks.{bid}.attn.q",
11061108
),
11071109

11081110
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: (
@@ -1116,6 +1118,7 @@ class TensorNameMap:
11161118
"vision_model.model.layers.{bid}.self_attn.k_proj", # llama4
11171119
"vision_tower.transformer.layers.{bid}.attention.k_proj", # pixtral
11181120
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
1121+
"vision_model.blocks.{bid}.attn.k",
11191122
),
11201123

11211124
MODEL_TENSOR.V_ENC_ATTN_K_NORM: (
@@ -1129,6 +1132,7 @@ class TensorNameMap:
11291132
"vision_model.model.layers.{bid}.self_attn.v_proj", # llama4
11301133
"vision_tower.transformer.layers.{bid}.attention.v_proj", # pixtral
11311134
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
1135+
"vision_model.blocks.{bid}.attn.v",
11321136
),
11331137

11341138
MODEL_TENSOR.V_ENC_INPUT_NORM: (
@@ -1139,6 +1143,7 @@ class TensorNameMap:
11391143
"vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
11401144
"vision_model.model.layers.{bid}.input_layernorm", # llama4
11411145
"visual.blocks.{bid}.norm1", # qwen2vl
1146+
"vision_model.blocks.{bid}.norm1",
11421147
),
11431148

11441149
MODEL_TENSOR.V_ENC_ATTN_O: (
@@ -1149,6 +1154,7 @@ class TensorNameMap:
11491154
"vision_model.model.layers.{bid}.self_attn.o_proj", # llama4
11501155
"vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
11511156
"visual.blocks.{bid}.attn.proj", # qwen2vl
1157+
"vision_model.blocks.{bid}.attn.proj",
11521158
),
11531159

11541160
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
@@ -1159,6 +1165,7 @@ class TensorNameMap:
11591165
"vision_model.model.layers.{bid}.post_attention_layernorm", # llama4
11601166
"vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
11611167
"visual.blocks.{bid}.norm2", # qwen2vl
1168+
"vision_model.blocks.{bid}.norm2",
11621169
),
11631170

11641171
MODEL_TENSOR.V_ENC_FFN_UP: (
@@ -1169,6 +1176,7 @@ class TensorNameMap:
11691176
"vision_model.model.layers.{bid}.mlp.fc1", # llama4
11701177
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
11711178
"visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
1179+
"vision_model.blocks.{bid}.mlp.fc1",
11721180
),
11731181

11741182
MODEL_TENSOR.V_ENC_FFN_GATE: (
@@ -1184,6 +1192,7 @@ class TensorNameMap:
11841192
"vision_model.model.layers.{bid}.mlp.fc2", # llama4
11851193
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
11861194
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
1195+
"vision_model.blocks.{bid}.mlp.fc2",
11871196
),
11881197

11891198
MODEL_TENSOR.V_LAYER_SCALE_1: (
@@ -1205,6 +1214,7 @@ class TensorNameMap:
12051214
"model.vision_model.post_layernorm", # SmolVLM
12061215
"vision_model.layernorm_post", # llama4
12071216
"visual.merger.ln_q", # qwen2vl
1217+
"vision_model.ln",
12081218
),
12091219

12101220
MODEL_TENSOR.V_MM_INP_PROJ: (

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ enum llm_arch {
8787
LLM_ARCH_ARCEE,
8888
LLM_ARCH_ERNIE4_5,
8989
LLM_ARCH_ERNIE4_5_MOE,
90+
LLM_ARCH_ERNIE4_5_VL_MOE,
9091
LLM_ARCH_HUNYUAN_MOE,
9192
LLM_ARCH_SMOLLM3,
9293
LLM_ARCH_LFM2,

src/llama-model.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,9 +1652,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
16521652
} break;
16531653
case LLM_ARCH_ERNIE4_5:
16541654
case LLM_ARCH_ERNIE4_5_MOE:
1655+
case LLM_ARCH_ERNIE4_5_VL_MOE:
16551656
{
16561657
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1657-
if (arch == LLM_ARCH_ERNIE4_5_MOE) {
1658+
if (arch == LLM_ARCH_ERNIE4_5_MOE || arch == LLM_ARCH_ERNIE4_5_VL_MOE) {
16581659
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
16591660
ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, false);
16601661
ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP, hparams.n_moe_layer_step);
@@ -17394,6 +17395,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1739417395
case LLM_ARCH_ARCEE:
1739517396
case LLM_ARCH_ERNIE4_5:
1739617397
case LLM_ARCH_ERNIE4_5_MOE:
17398+
case LLM_ARCH_ERNIE4_5_VL_MOE:
1739717399
return LLAMA_ROPE_TYPE_NORM;
1739817400

1739917401
// the pairs of head values are offset by n_rot/2

tools/mtmd/clip-impl.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
4646
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
4747

48+
// ernie4.5-vl specific
49+
#define KEY_IN_DIM "clip.vision.in_dim"
50+
#define KEY_OUT_DIM "clip.vision.out_dim"
51+
#define KEY_SPATIAL_CONV_SIZE "clip.vision.spatial_conv_size"
52+
#define KEY_TEMPORAL_CONV_SIZE "clip.vision.temporal_conv_size"
53+
4854
// audio-specific
4955
#define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins"
5056
#define KEY_A_PROJ_STACK_FACTOR "clip.audio.projector.stack_factor"
@@ -111,6 +117,17 @@
111117
#define TN_MM_NORM_PRE "mm.a.norm_pre.%s"
112118
#define TN_MM_NORM_MID "mm.a.norm_mid.%s"
113119

120+
// ernie4.5-vl
121+
#define TN_MM_RESAMPLER_IN_PROJ_W "mm.resampler.in_proj.weight"
122+
#define TN_MM_RESAMPLER_IN_PROJ_B "mm.resampler.in_proj.bias"
123+
#define TN_MM_RESAMPLER_OUT_PROJ_W "mm.resampler.out_proj.weight"
124+
#define TN_MM_RESAMPLER_OUT_PROJ_B "mm.resampler.out_proj.bias"
125+
#define TN_MM_RESAMPLER_POS_EMB "mm.resampler.pos_emb"
126+
#define TN_MM_RESAMPLER_SPATIAL_CONV_W "mm.resampler.spatial_conv.weight"
127+
#define TN_MM_RESAMPLER_SPATIAL_CONV_B "mm.resampler.spatial_conv.bias"
128+
#define TN_MM_RESAMPLER_TEMPORAL_CONV_W "mm.resampler.temporal_conv.weight"
129+
#define TN_MM_RESAMPLER_TEMPORAL_CONV_B "mm.resampler.temporal_conv.bias"
130+
114131
// align x to upper multiple of n
115132
#define CLIP_ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n))
116133

@@ -131,6 +148,7 @@ enum projector_type {
131148
PROJECTOR_TYPE_LLAMA4,
132149
PROJECTOR_TYPE_QWEN2A,
133150
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
151+
PROJECTOR_TYPE_ERNIE45_VL_RESAMPLER,
134152
PROJECTOR_TYPE_UNKNOWN,
135153
};
136154

@@ -150,6 +168,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
150168
{ PROJECTOR_TYPE_LLAMA4, "llama4"},
151169
{ PROJECTOR_TYPE_QWEN2A, "qwen2a"},
152170
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
171+
{ PROJECTOR_TYPE_ERNIE45_VL_RESAMPLER, "ernie45_vl_resampler"},
153172
};
154173

155174
static projector_type clip_projector_type_from_string(const std::string & str) {

0 commit comments

Comments
 (0)