Skip to content

Commit ea50d1d

Browse files
committed
Merge remote-tracking branch 'upstream/concedo_experimental' into croco_nex_0
2 parents 911be02 + fbf5c04 commit ea50d1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2580
-916
lines changed

common/minja/minja.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ class Value : public std::enable_shared_from_this<Value> {
240240
auto index = key.get<int>();
241241
return array_->at(index < 0 ? array_->size() + index : index);
242242
} else if (object_) {
243-
if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump());
243+
if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
244244
auto it = object_->find(key.primitive_);
245245
if (it == object_->end()) return Value();
246246
return it->second;
@@ -249,7 +249,7 @@ class Value : public std::enable_shared_from_this<Value> {
249249
}
250250
void set(const Value& key, const Value& value) {
251251
if (!object_) throw std::runtime_error("Value is not an object: " + dump());
252-
if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump());
252+
if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
253253
(*object_)[key.primitive_] = value;
254254
}
255255
Value call(const std::shared_ptr<Context> & context, ArgumentsValue & args) const {

convert_hf_to_gguf.py

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
708708
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709709
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710710
res = "superbpe"
711+
if chkhsh == "1994ffd01900cfb37395608534236ecd63f2bd5995d6cb1004dda1af50240f15":
712+
# ref: https://huggingface.co/trillionlabs/Trillion-7B-preview
713+
res = "trillion"
714+
if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224":
715+
# ref: https://huggingface.co/inclusionAI/Ling-lite
716+
res = "bailingmoe"
711717

712718
if res is None:
713719
logger.warning("\n")
@@ -3551,8 +3557,8 @@ def set_gguf_parameters(self):
35513557
head_size = hidden_size // num_attention_heads
35523558
rms_norm_eps = self.hparams["rms_norm_eps"]
35533559
intermediate_size = self.hparams["intermediate_size"]
3554-
time_mix_extra_dim = 64 if hidden_size >= 4096 else 32
3555-
time_decay_extra_dim = 128 if hidden_size >= 4096 else 64
3560+
time_mix_extra_dim = self.hparams.get("lora_rank_tokenshift", 64 if hidden_size >= 4096 else 32)
3561+
time_decay_extra_dim = self.hparams.get("lora_rank_decay", 128 if hidden_size >= 4096 else 64)
35563562

35573563
# RWKV isn't context limited
35583564
self.gguf_writer.add_context_length(1048576)
@@ -5130,6 +5136,108 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
51305136
return super().modify_tensors(data_torch, name, bid)
51315137

51325138

5139+
@Model.register("BailingMoeForCausalLM")
5140+
class BailingMoeModel(Model):
5141+
model_arch = gguf.MODEL_ARCH.BAILINGMOE
5142+
5143+
def set_vocab(self):
5144+
self._set_vocab_gpt2()
5145+
5146+
def set_gguf_parameters(self):
5147+
super().set_gguf_parameters()
5148+
hparams = self.hparams
5149+
if hparams.get("head_dim"):
5150+
rope_dim = hparams["head_dim"]
5151+
else:
5152+
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
5153+
5154+
self.gguf_writer.add_rope_dimension_count(rope_dim)
5155+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
5156+
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
5157+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
5158+
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
5159+
self.gguf_writer.add_expert_weights_scale(1.0)
5160+
self.gguf_writer.add_expert_count(hparams["num_experts"])
5161+
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
5162+
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
5163+
5164+
_experts: list[dict[str, Tensor]] | None = None
5165+
5166+
@staticmethod
5167+
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
5168+
if n_head_kv is not None and n_head != n_head_kv:
5169+
n_head = n_head_kv
5170+
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
5171+
.swapaxes(1, 2)
5172+
.reshape(weights.shape))
5173+
5174+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5175+
n_head = self.hparams["num_attention_heads"]
5176+
n_kv_head = self.hparams.get("num_key_value_heads")
5177+
n_embd = self.hparams["hidden_size"]
5178+
head_dim = self.hparams.get("head_dim", n_embd // n_head)
5179+
5180+
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
5181+
5182+
if name.endswith("attention.dense.weight"):
5183+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), data_torch)]
5184+
elif name.endswith("query_key_value.weight"):
5185+
q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2)
5186+
5187+
return [
5188+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), BailingMoeModel.permute(q, n_head, n_head)),
5189+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), BailingMoeModel.permute(k, n_head, n_kv_head)),
5190+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v)
5191+
]
5192+
elif name.find("mlp.experts") != -1:
5193+
n_experts = self.hparams["num_experts"]
5194+
assert bid is not None
5195+
5196+
tensors: list[tuple[str, Tensor]] = []
5197+
5198+
if self._experts is None:
5199+
self._experts = [{} for _ in range(self.block_count)]
5200+
5201+
self._experts[bid][name] = data_torch
5202+
5203+
if len(self._experts[bid]) >= n_experts * 3:
5204+
# merge the experts into a single 3d tensor
5205+
for w_name in ["down_proj", "gate_proj", "up_proj"]:
5206+
datas: list[Tensor] = []
5207+
5208+
for xid in range(n_experts):
5209+
ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
5210+
datas.append(self._experts[bid][ename])
5211+
del self._experts[bid][ename]
5212+
5213+
data_torch = torch.stack(datas, dim=0)
5214+
5215+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
5216+
5217+
new_name = self.map_tensor_name(merged_name)
5218+
5219+
tensors.append((new_name, data_torch))
5220+
5221+
return tensors
5222+
5223+
new_name = self.map_tensor_name(name)
5224+
5225+
if new_name == output_name and self.hparams.get("norm_head"):
5226+
data_torch = data_torch.float()
5227+
data_torch /= torch.norm(data_torch, p=2, dim=0, keepdim=True) + 1e-7
5228+
5229+
return [(new_name, data_torch)]
5230+
5231+
def prepare_tensors(self):
5232+
super().prepare_tensors()
5233+
5234+
if self._experts is not None:
5235+
# flatten `list[dict[str, Tensor]]` into `list[str]`
5236+
experts = [k for d in self._experts for k in d.keys()]
5237+
if len(experts) > 0:
5238+
raise ValueError(f"Unprocessed experts: {experts}")
5239+
5240+
51335241
@Model.register("ChameleonForConditionalGeneration")
51345242
@Model.register("ChameleonForCausalLM") # obsolete
51355243
class ChameleonModel(Model):

convert_hf_to_gguf_update.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ class TOKENIZER_TYPE(IntEnum):
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113113
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
114+
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
115+
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
114116
]
115117

116118

examples/llava/clip.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,14 +1517,16 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
15171517
const int n_kv = gguf_get_n_kv(ctx);
15181518
const int ftype = get_u32(ctx, KEY_FTYPE);
15191519
const std::string ftype_str = get_ftype(ftype);
1520-
const int idx_desc = get_key_idx(ctx, KEY_DESCRIPTION);
1521-
const std::string description = gguf_get_val_str(ctx, idx_desc);
15221520
const int idx_name = gguf_find_key(ctx, KEY_NAME);
15231521
if (idx_name != -1) { // make name optional temporarily as some of the uploaded models missing it due to a bug
15241522
const std::string name = gguf_get_val_str(ctx, idx_name);
15251523
LOG_INF("%s: model name: %s\n", __func__, name.c_str());
15261524
}
1527-
LOG_INF("%s: description: %s\n", __func__, description.c_str());
1525+
const int idx_desc = gguf_find_key(ctx, KEY_DESCRIPTION);
1526+
if (idx_desc != -1) { // ditto
1527+
const std::string description = gguf_get_val_str(ctx, idx_desc);
1528+
LOG_INF("%s: description: %s\n", __func__, description.c_str());
1529+
}
15281530
LOG_INF("%s: GGUF version: %d\n", __func__, gguf_get_version(ctx));
15291531
LOG_INF("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
15301532
LOG_INF("%s: n_tensors: %d\n", __func__, n_tensors);

examples/llava/qwen2_vl_surgery.py

Lines changed: 104 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
import numpy as np
66
from gguf import *
77
from transformers import (
8+
AutoProcessor,
89
Qwen2VLForConditionalGeneration,
10+
Qwen2_5_VLForConditionalGeneration,
911
Qwen2VLProcessor,
10-
AutoProcessor,
11-
Qwen2VLConfig
12+
Qwen2VLConfig,
13+
Qwen2_5_VLConfig,
1214
)
1315

1416

@@ -18,62 +20,80 @@
1820
def k(raw_key: str, arch: str) -> str:
1921
return raw_key.format(arch=arch)
2022

23+
class VL2:
24+
25+
@staticmethod
26+
def to_gguf_name(name: str) -> str:
27+
og = name
28+
name = name.replace("text_model", "t").replace("vision_model", "v")
29+
name = name.replace("blocks", "blk").replace("embeddings.", "")
30+
name = name.replace("attn.", "attn_")
31+
name = name.replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("proj.", "out.")
32+
# name = name.replace("layrnorm", "ln").replace("layer_norm", "ln").replace("layernorm", "ln")
33+
name = name.replace("norm1", "ln1").replace("norm2", "ln2")
34+
name = name.replace("merger.mlp", 'mm')
35+
print(f"[to_gguf_name] {og} --> {name}")
36+
return name
37+
38+
@classmethod
39+
def find_vision_tensors(cls, qwen2vl, dtype) -> Dict[str, np.ndarray]:
40+
vision_model = qwen2vl.visual
41+
tensor_map = {}
42+
for name, ten in vision_model.state_dict().items():
43+
ten = ten.numpy()
44+
if 'qkv' in name:
45+
if ten.ndim == 2: # weight
46+
c3, _ = ten.shape
47+
else: # bias
48+
c3 = ten.shape[0]
49+
assert c3 % 3 == 0
50+
c = c3 // 3
51+
wq = ten[:c]
52+
wk = ten[c: c * 2]
53+
wv = ten[c * 2:]
54+
tensor_map[cls.to_gguf_name(f"vision_model.{name}").replace("qkv", "q")] = wq
55+
tensor_map[cls.to_gguf_name(f"vision_model.{name}").replace("qkv", "k")] = wk
56+
tensor_map[cls.to_gguf_name(f"vision_model.{name}").replace("qkv", "v")] = wv
57+
elif 'merger' in name:
58+
if name.endswith("ln_q.weight"):
59+
tensor_map['v.post_ln.weight'] = ten
60+
elif name.endswith("ln_q.bias"):
61+
tensor_map['v.post_ln.bias'] = ten
62+
else:
63+
# "merger.mlp.%d.weight/bias" --> "mm.%d.weight/bias"
64+
tensor_map[cls.to_gguf_name(name)] = ten
65+
elif 'patch_embed.proj.weight' in name:
66+
# NOTE: split Conv3D into Conv2Ds
67+
c1, c2, kt, kh, kw = ten.shape
68+
assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
69+
tensor_map["v.patch_embd.weight"] = ten[:, :, 0, ...]
70+
tensor_map["v.patch_embd.weight.1"] = ten[:, :, 1, ...]
71+
else:
72+
tensor_map[cls.to_gguf_name(f"vision_model.{name}")] = ten
2173

22-
def to_gguf_name(name: str) -> str:
23-
og = name
24-
name = name.replace("text_model", "t").replace("vision_model", "v")
25-
name = name.replace("blocks", "blk").replace("embeddings.", "")
26-
name = name.replace("attn.", "attn_")
27-
name = name.replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("proj.", "out.")
28-
# name = name.replace("layrnorm", "ln").replace("layer_norm", "ln").replace("layernorm", "ln")
29-
name = name.replace("norm1", "ln1").replace("norm2", "ln2")
30-
name = name.replace("merger.mlp", 'mm')
31-
print(f"[to_gguf_name] {og} --> {name}")
32-
return name
33-
34-
35-
def find_vision_tensors(qwen2vl, dtype) -> Dict[str, np.ndarray]:
36-
vision_model = qwen2vl.visual
37-
tensor_map = {}
38-
for name, ten in vision_model.state_dict().items():
39-
ten = ten.numpy()
40-
if 'qkv' in name:
41-
if ten.ndim == 2: # weight
42-
c3, _ = ten.shape
43-
else: # bias
44-
c3 = ten.shape[0]
45-
assert c3 % 3 == 0
46-
c = c3 // 3
47-
wq = ten[:c]
48-
wk = ten[c: c * 2]
49-
wv = ten[c * 2:]
50-
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "q")] = wq
51-
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "k")] = wk
52-
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "v")] = wv
53-
elif 'merger' in name:
54-
if name.endswith("ln_q.weight"):
55-
tensor_map['v.post_ln.weight'] = ten
56-
elif name.endswith("ln_q.bias"):
57-
tensor_map['v.post_ln.bias'] = ten
74+
for new_name, ten in tensor_map.items():
75+
if ten.ndim <= 1 or new_name.endswith("_norm.weight"):
76+
tensor_map[new_name] = ten.astype(np.float32)
5877
else:
59-
# "merger.mlp.%d.weight/bias" --> "mm.%d.weight/bias"
60-
tensor_map[to_gguf_name(name)] = ten
61-
elif 'patch_embed.proj.weight' in name:
62-
# NOTE: split Conv3D into Conv2Ds
63-
c1, c2, kt, kh, kw = ten.shape
64-
assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
65-
tensor_map["v.patch_embd.weight"] = ten[:, :, 0, ...]
66-
tensor_map["v.patch_embd.weight.1"] = ten[:, :, 1, ...]
67-
else:
68-
tensor_map[to_gguf_name(f"vision_model.{name}")] = ten
69-
70-
for new_name, ten in tensor_map.items():
71-
if ten.ndim <= 1 or new_name.endswith("_norm.weight"):
72-
tensor_map[new_name] = ten.astype(np.float32)
73-
else:
74-
tensor_map[new_name] = ten.astype(dtype)
75-
tensor_map["v.position_embd.weight"] = np.zeros([10, 10], dtype=np.float32) # dummy tensor, just here as a placeholder
76-
return tensor_map
78+
tensor_map[new_name] = ten.astype(dtype)
79+
tensor_map["v.position_embd.weight"] = np.zeros([10, 10], dtype=np.float32) # dummy tensor, just here as a placeholder
80+
return tensor_map
81+
82+
83+
class VL25(VL2):
84+
85+
@staticmethod
86+
def to_gguf_name(name: str) -> str:
87+
og = name
88+
name = name.replace("text_model", "t").replace("vision_model", "v")
89+
name = name.replace("blocks", "blk").replace("embeddings.", "")
90+
name = name.replace("attn.", "attn_")
91+
name = name.replace("mlp.down_proj", "ffn_down").replace("mlp.up_proj", "ffn_up")
92+
name = name.replace("mlp.gate_proj", "ffn_gate").replace("proj.", "out.")
93+
name = name.replace("norm1", "ln1").replace("norm2", "ln2")
94+
name = name.replace("merger.mlp", 'mm')
95+
print(f"[vl25][to_gguf_name] {og} --> {name}")
96+
return name
7797

7898

7999
def main(args):
@@ -92,11 +112,18 @@ def main(args):
92112
model_path = ""
93113
model_name = args.model_name
94114
print("model_name: ", model_name)
95-
qwen2vl = Qwen2VLForConditionalGeneration.from_pretrained(
96-
model_name, torch_dtype=dtype, device_map="cpu"
97-
)
98-
cfg: Qwen2VLConfig = qwen2vl.config # type: ignore[reportAssignmentType]
99-
vcfg = cfg.vision_config
115+
if args.model_type == "qwen2vl":
116+
qwen2vl = Qwen2VLForConditionalGeneration.from_pretrained(
117+
model_name, torch_dtype=dtype, device_map="cpu"
118+
)
119+
cfg: Qwen2VLConfig = qwen2vl.config # type: ignore[reportAssignmentType]
120+
vcfg = cfg.vision_config
121+
else:
122+
qwen2vl = Qwen2_5_VLForConditionalGeneration.from_pretrained(
123+
model_name, torch_dtype=dtype, device_map="cpu"
124+
)
125+
cfg: Qwen2_5_VLConfig = qwen2vl.config # type: ignore[reportAssignmentType]
126+
vcfg = cfg.vision_config
100127

101128
if os.path.isdir(model_name):
102129
local_model = True
@@ -125,14 +152,26 @@ def main(args):
125152
else:
126153
raise ValueError()
127154

128-
tensor_map = find_vision_tensors(qwen2vl, np_dtype)
155+
if args.model_type == "qwen2.5vl":
156+
fout.add_bool("clip.use_glu_mlp", True) # gate linear unit MLP layer in vision model
157+
fout.add_bool("clip.use_rms_norm", True)
158+
fout.add_array("clip.vision.fullatt_block_indexes", vcfg.fullatt_block_indexes)
159+
fout.add_uint32("clip.vision.window_size", vcfg.window_size)
160+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), vcfg.hidden_size)
161+
fout.add_uint32("clip.vision.projection_dim", vcfg.out_hidden_size)
162+
else:
163+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), vcfg.embed_dim)
164+
fout.add_uint32("clip.vision.projection_dim", vcfg.hidden_size)
165+
166+
if args.model_type == "qwen2.5vl":
167+
tensor_map = VL25.find_vision_tensors(qwen2vl, np_dtype)
168+
else:
169+
tensor_map = VL2.find_vision_tensors(qwen2vl, np_dtype)
129170
for name, data in tensor_map.items():
130171
fout.add_tensor(name, data)
131172

132173
fout.add_uint32("clip.vision.patch_size", vcfg.patch_size)
133174
fout.add_uint32("clip.vision.image_size", 14 * 40) # some reasonable size that is divable by (14*2)
134-
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), vcfg.embed_dim)
135-
fout.add_uint32("clip.vision.projection_dim", vcfg.hidden_size)
136175
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), vcfg.num_heads)
137176
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
138177
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
@@ -160,6 +199,7 @@ def main(args):
160199
if __name__ == "__main__":
161200
parser = argparse.ArgumentParser()
162201
parser.add_argument("model_name", nargs='?', default="Qwen/Qwen2-VL-2B-Instruct")
202+
parser.add_argument("--model_type", nargs='?', choices=['qwen2vl', 'qwen2.5vl'], default="qwen2vl")
163203
parser.add_argument("--data_type", nargs='?', choices=['fp32', 'fp16'], default="fp32")
164204
args = parser.parse_args()
165205
main(args)

0 commit comments

Comments
 (0)