Skip to content

Commit 18a20cf

Browse files
Merge pull request #37 from menloresearch/update-dev-from-master-2025-03-31-00-08
Sync master with upstream release b5002
2 parents 2881a0a + 2c3f8b8 commit 18a20cf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1641
-2790
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
112112
- [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM)
113113
- [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1)
114114
- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct)
115+
- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
116+
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
115117

116118
#### Multimodal
117119

ci/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ docker run --privileged -it \
6060
Inside the container, execute the following commands:
6161

6262
```bash
63-
apt update -y && apt install -y bc cmake git python3.10-venv time unzip wget
63+
apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget
6464
git config --global --add safe.directory /ws
6565
GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
6666
```

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ fi
6969
if [ ! -z ${GG_BUILD_MUSA} ]; then
7070
# Use qy1 by default (MTT S80)
7171
MUSA_ARCH=${MUSA_ARCH:-21}
72-
CMAKE_EXTRA="-DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
72+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
7373
fi
7474
## helpers
7575

convert_hf_to_gguf.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
708708
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709709
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710710
res = "superbpe"
711+
if chkhsh == "1994ffd01900cfb37395608534236ecd63f2bd5995d6cb1004dda1af50240f15":
712+
# ref: https://huggingface.co/trillionlabs/Trillion-7B-preview
713+
res = "trillion"
714+
if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224":
715+
# ref: https://huggingface.co/inclusionAI/Ling-lite
716+
res = "bailingmoe"
711717

712718
if res is None:
713719
logger.warning("\n")
@@ -5130,6 +5136,108 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
51305136
return super().modify_tensors(data_torch, name, bid)
51315137

51325138

5139+
@Model.register("BailingMoeForCausalLM")
5140+
class BailingMoeModel(Model):
5141+
model_arch = gguf.MODEL_ARCH.BAILINGMOE
5142+
5143+
def set_vocab(self):
5144+
self._set_vocab_gpt2()
5145+
5146+
def set_gguf_parameters(self):
5147+
super().set_gguf_parameters()
5148+
hparams = self.hparams
5149+
if "head_dim" in hparams:
5150+
rope_dim = hparams["head_dim"]
5151+
else:
5152+
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
5153+
5154+
self.gguf_writer.add_rope_dimension_count(rope_dim)
5155+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
5156+
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
5157+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
5158+
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
5159+
self.gguf_writer.add_expert_weights_scale(1.0)
5160+
self.gguf_writer.add_expert_count(hparams["num_experts"])
5161+
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
5162+
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
5163+
5164+
_experts: list[dict[str, Tensor]] | None = None
5165+
5166+
@staticmethod
5167+
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
5168+
if n_head_kv is not None and n_head != n_head_kv:
5169+
n_head = n_head_kv
5170+
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
5171+
.swapaxes(1, 2)
5172+
.reshape(weights.shape))
5173+
5174+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5175+
n_head = self.hparams["num_attention_heads"]
5176+
n_kv_head = self.hparams.get("num_key_value_heads")
5177+
n_embd = self.hparams["hidden_size"]
5178+
head_dim = self.hparams.get("head_dim", n_embd // n_head)
5179+
5180+
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
5181+
5182+
if name.endswith("attention.dense.weight"):
5183+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), data_torch)]
5184+
elif name.endswith("query_key_value.weight"):
5185+
q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2)
5186+
5187+
return [
5188+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), BailingMoeModel.permute(q, n_head, n_head)),
5189+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), BailingMoeModel.permute(k, n_head, n_kv_head)),
5190+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v)
5191+
]
5192+
elif name.find("mlp.experts") != -1:
5193+
n_experts = self.hparams["num_experts"]
5194+
assert bid is not None
5195+
5196+
tensors: list[tuple[str, Tensor]] = []
5197+
5198+
if self._experts is None:
5199+
self._experts = [{} for _ in range(self.block_count)]
5200+
5201+
self._experts[bid][name] = data_torch
5202+
5203+
if len(self._experts[bid]) >= n_experts * 3:
5204+
# merge the experts into a single 3d tensor
5205+
for w_name in ["down_proj", "gate_proj", "up_proj"]:
5206+
datas: list[Tensor] = []
5207+
5208+
for xid in range(n_experts):
5209+
ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
5210+
datas.append(self._experts[bid][ename])
5211+
del self._experts[bid][ename]
5212+
5213+
data_torch = torch.stack(datas, dim=0)
5214+
5215+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
5216+
5217+
new_name = self.map_tensor_name(merged_name)
5218+
5219+
tensors.append((new_name, data_torch))
5220+
5221+
return tensors
5222+
5223+
new_name = self.map_tensor_name(name)
5224+
5225+
if new_name == output_name and self.hparams.get("norm_head"):
5226+
data_torch = data_torch.float()
5227+
data_torch /= torch.norm(data_torch, p=2, dim=0, keepdim=True) + 1e-7
5228+
5229+
return [(new_name, data_torch)]
5230+
5231+
def prepare_tensors(self):
5232+
super().prepare_tensors()
5233+
5234+
if self._experts is not None:
5235+
# flatten `list[dict[str, Tensor]]` into `list[str]`
5236+
experts = [k for d in self._experts for k in d.keys()]
5237+
if len(experts) > 0:
5238+
raise ValueError(f"Unprocessed experts: {experts}")
5239+
5240+
51335241
@Model.register("ChameleonForConditionalGeneration")
51345242
@Model.register("ChameleonForCausalLM") # obsolete
51355243
class ChameleonModel(Model):

convert_hf_to_gguf_update.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ class TOKENIZER_TYPE(IntEnum):
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113113
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
114+
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
115+
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
114116
]
115117

116118

ggml/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ else()
100100
set(INS_ENB ON)
101101
endif()
102102

103+
message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}")
104+
message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
105+
message(DEBUG "INS_ENB : ${INS_ENB}")
106+
103107
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
104108
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
105109
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)

ggml/src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ if (GGML_LTO)
6565
endif()
6666
endif()
6767

68-
if (GGML_CCACHE)
68+
if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
6969
find_program(GGML_CCACHE_FOUND ccache)
7070
find_program(GGML_SCCACHE_FOUND sccache)
7171

ggml/src/ggml-cann/.clang-format

Lines changed: 0 additions & 168 deletions
This file was deleted.

0 commit comments

Comments
 (0)