Skip to content

Commit b2eb613

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents d8421a2 + 6c6e397 commit b2eb613

File tree

13 files changed

+627
-212
lines changed

13 files changed

+627
-212
lines changed

convert_hf_to_gguf.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7589,6 +7589,88 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
75897589
return [(self.map_tensor_name(name), data_torch)]
75907590

75917591

7592+
@ModelBase.register("SmallThinkerForCausalLM")
7593+
class SmallThinkerModel(TextModel):
7594+
model_arch = gguf.MODEL_ARCH.SMALLTHINKER
7595+
7596+
def set_gguf_parameters(self):
7597+
super().set_gguf_parameters()
7598+
if (n_experts := self.hparams.get("num_experts", self.hparams.get("moe_num_primary_experts"))) is not None:
7599+
self.gguf_writer.add_expert_count(n_experts)
7600+
if (n_experts_used := self.hparams.get("num_experts_per_tok", self.hparams.get("moe_num_active_primary_experts"))) is not None:
7601+
self.gguf_writer.add_expert_used_count(n_experts_used)
7602+
if (moe_intermediate_size := self.hparams.get("moe_ffn_hidden_size")) is not None:
7603+
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
7604+
self.gguf_writer.add_feed_forward_length(moe_intermediate_size)
7605+
logger.info(f"gguf: expert feed forward length = {moe_intermediate_size}")
7606+
if (self.hparams.get('moe_primary_router_apply_softmax')):
7607+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7608+
else:
7609+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7610+
# YaRN is not enabled by default
7611+
# To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts
7612+
rope_scaling = self.hparams.get("rope_scaling") or {}
7613+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
7614+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
7615+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
7616+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
7617+
7618+
sliding_window_layout = self.hparams.get("sliding_window_layout")
7619+
if sliding_window_layout:
7620+
for i in sliding_window_layout:
7621+
if i != 0:
7622+
sliding_window = self.hparams.get("sliding_window_size")
7623+
if sliding_window:
7624+
self.gguf_writer.add_sliding_window(sliding_window)
7625+
break
7626+
7627+
_experts: list[dict[str, Tensor]] | None = None
7628+
7629+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
7630+
# process the experts separately
7631+
if name.find("experts") != -1:
7632+
n_experts = self.hparams.get("num_experts", self.hparams.get("moe_num_primary_experts"))
7633+
assert bid is not None
7634+
7635+
if self._experts is None:
7636+
self._experts = [{} for _ in range(self.block_count)]
7637+
7638+
self._experts[bid][name] = data_torch
7639+
7640+
if len(self._experts[bid]) >= n_experts * 3:
7641+
tensors: list[tuple[str, Tensor]] = []
7642+
7643+
# merge the experts into a single 3d tensor
7644+
for w_name in ["down", "gate", "up"]:
7645+
datas: list[Tensor] = []
7646+
7647+
for xid in range(n_experts):
7648+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{w_name}.weight"
7649+
datas.append(self._experts[bid][ename])
7650+
del self._experts[bid][ename]
7651+
7652+
data_torch = torch.stack(datas, dim=0)
7653+
7654+
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
7655+
7656+
new_name = self.map_tensor_name(merged_name)
7657+
7658+
tensors.append((new_name, data_torch))
7659+
return tensors
7660+
else:
7661+
return []
7662+
7663+
return [(self.map_tensor_name(name), data_torch)]
7664+
7665+
def prepare_tensors(self):
7666+
super().prepare_tensors()
7667+
7668+
if self._experts is not None:
7669+
# flatten `list[dict[str, Tensor]]` into `list[str]`
7670+
experts = [k for d in self._experts for k in d.keys()]
7671+
if len(experts) > 0:
7672+
raise ValueError(f"Unprocessed experts: {experts}")
7673+
75927674
###### CONVERSION LOGIC ######
75937675

75947676

ggml/src/ggml-sycl/backend.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "mmvq.hpp"
2929
#include "norm.hpp"
3030
#include "outprod.hpp"
31+
#include "quantize.hpp"
3132
#include "quants.hpp"
3233
#include "rope.hpp"
3334
#include "set_rows.hpp"

0 commit comments

Comments
 (0)