Skip to content

Commit 9950796

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents ecd2dba + 4dca015 commit 9950796

File tree

12 files changed

+15074
-4921
lines changed

12 files changed

+15074
-4921
lines changed

convert_hf_to_gguf.py

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,10 @@ def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Call
189189
return tensors
190190

191191
prefix = "model" if not self.is_mistral_format else "consolidated"
192-
part_names: list[str] = ModelBase.get_model_part_names(self.dir_model, prefix, ".safetensors")
192+
part_names: set[str] = set(ModelBase.get_model_part_names(self.dir_model, prefix, ".safetensors"))
193193
is_safetensors: bool = len(part_names) > 0
194194
if not is_safetensors:
195-
part_names = ModelBase.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
195+
part_names = set(ModelBase.get_model_part_names(self.dir_model, "pytorch_model", ".bin"))
196196

197197
tensor_names_from_index: set[str] = set()
198198

@@ -209,6 +209,7 @@ def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Call
209209
if weight_map is None or not isinstance(weight_map, dict):
210210
raise ValueError(f"Can't load 'weight_map' from {index_name!r}")
211211
tensor_names_from_index.update(weight_map.keys())
212+
part_names |= set(weight_map.values())
212213
else:
213214
weight_map = {}
214215
else:
@@ -835,6 +836,15 @@ def set_gguf_parameters(self):
835836
self.gguf_writer.add_expert_group_used_count(n_group_used)
836837
logger.info(f"gguf: expert groups used count = {n_group_used}")
837838

839+
if (score_func := self.find_hparam(["score_function", "scoring_func", "score_func"], optional=True)) is not None:
840+
if score_func == "sigmoid":
841+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
842+
elif score_func == "softmax":
843+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
844+
else:
845+
raise ValueError(f"Unsupported expert score gating function value: {score_func}")
846+
logger.info(f"gguf: expert score gating function = {score_func}")
847+
838848
if (head_dim := self.hparams.get("head_dim")) is not None:
839849
self.gguf_writer.add_key_length(head_dim)
840850
self.gguf_writer.add_value_length(head_dim)
@@ -2563,15 +2573,6 @@ def set_gguf_parameters(self):
25632573
if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None:
25642574
self.gguf_writer.add_leading_dense_block_count(n_dense_layers)
25652575

2566-
# Expert Gating Function
2567-
score_func = self.hparams.get("score_func")
2568-
if score_func == "sigmoid":
2569-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
2570-
elif score_func == "softmax":
2571-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
2572-
elif score_func is not None:
2573-
raise ValueError(f"Unsupported score_function value: {score_func}")
2574-
25752576
# Route normalization and scaling
25762577
if (route_norm := self.hparams.get("route_norm")) is not None:
25772578
self.gguf_writer.add_expert_weights_norm(route_norm)
@@ -7192,13 +7193,6 @@ def set_gguf_parameters(self):
71927193
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
71937194
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
71947195

7195-
if hparams["scoring_func"] == "sigmoid":
7196-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7197-
elif hparams["scoring_func"] == "softmax":
7198-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7199-
else:
7200-
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")
7201-
72027196
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
72037197

72047198
rope_scaling = self.hparams.get("rope_scaling") or {}
@@ -7304,12 +7298,6 @@ def __init__(self, *args, **kwargs):
73047298

73057299
def set_gguf_parameters(self):
73067300
super().set_gguf_parameters()
7307-
if self.hparams["scoring_func"] == "sigmoid":
7308-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7309-
elif self.hparams["scoring_func"] == "softmax":
7310-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7311-
else:
7312-
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
73137301

73147302
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
73157303
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
@@ -7402,11 +7390,6 @@ def set_gguf_parameters(self):
74027390
self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
74037391
self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
74047392

7405-
if self.hparams["scoring_func"] == "noaux_tc":
7406-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7407-
else:
7408-
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
7409-
74107393
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
74117394
if name.endswith("e_score_correction_bias"):
74127395
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
@@ -8727,13 +8710,6 @@ def set_gguf_parameters(self):
87278710
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
87288711
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
87298712

8730-
if hparams["score_function"] == "sigmoid":
8731-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
8732-
elif hparams["score_function"] == "softmax":
8733-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
8734-
else:
8735-
raise ValueError(f"Unsupported score_function value: {hparams['score_function']}")
8736-
87378713
if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
87388714
self.gguf_writer.add_nextn_predict_layers(nextn_layers)
87398715

docs/ops.md

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ Legend:
1414

1515
| Operation | BLAS | CANN | CPU | CUDA | Metal | OpenCL | SYCL | Vulkan | zDNN |
1616
|-----------|------|------|------|------|------|------|------|------|------|
17-
| ABS |||| 🟡 | 🟡 || 🟡 | ||
17+
| ABS |||| 🟡 | 🟡 || 🟡 | 🟡 ||
1818
| ACC ||||||||||
1919
| ADD ||||| 🟡 | 🟡 ||||
2020
| ADD1 ||||||||||
21-
| ADD_ID |||||||| ||
21+
| ADD_ID |||||||| ||
2222
| ARANGE ||||||||||
2323
| ARGMAX ||||||||||
24-
| ARGSORT |||||||| ||
24+
| ARGSORT |||||||| 🟡 ||
2525
| CEIL |||| 🟡 ||| 🟡 |||
2626
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2727
| CONCAT |||| 🟡 || 🟡 ||||
@@ -30,7 +30,7 @@ Legend:
3030
| CONV_2D_DW ||||||||||
3131
| CONV_3D ||||||||||
3232
| CONV_TRANSPOSE_1D ||||||||||
33-
| CONV_TRANSPOSE_2D |||||||| ||
33+
| CONV_TRANSPOSE_2D |||||||| ||
3434
| COS ||||| 🟡 ||| 🟡 ||
3535
| COUNT_EQUAL ||||||||||
3636
| CPY || 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 ||
@@ -41,7 +41,7 @@ Legend:
4141
| DIV ||||| 🟡 | 🟡 ||||
4242
| DUP |||| 🟡 | 🟡 | 🟡 || 🟡 ||
4343
| ELU |||| 🟡 | 🟡 || 🟡 |||
44-
| EXP |||| 🟡 | 🟡 || 🟡 | ||
44+
| EXP |||| 🟡 | 🟡 || 🟡 | 🟡 ||
4545
| EXPM1 |||| 🟡 ||||||
4646
| FILL ||||||||||
4747
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
@@ -57,22 +57,22 @@ Legend:
5757
| GET_ROWS_BACK ||| 🟡 | 🟡 ||||||
5858
| GROUP_NORM ||||||||||
5959
| GROUP_NORM_MUL_ADD ||||||||||
60-
| HARDSIGMOID |||| 🟡 | 🟡 || 🟡 | ||
61-
| HARDSWISH |||| 🟡 | 🟡 || 🟡 | ||
60+
| HARDSIGMOID |||| 🟡 | 🟡 || 🟡 | 🟡 ||
61+
| HARDSWISH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
6262
| IM2COL ||||| 🟡 |||||
63-
| IM2COL_3D |||||||| ||
63+
| IM2COL_3D |||||||| ||
6464
| L2_NORM ||||||||||
65-
| LEAKY_RELU |||||||| ||
65+
| LEAKY_RELU |||||||| 🟡 ||
6666
| LOG ||||||||||
67-
| MEAN |||||||| ||
67+
| MEAN |||||||| ||
6868
| MUL ||||| 🟡 | 🟡 ||||
6969
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
7070
| MUL_MAT_ID || 🟡 |||| 🟡 | 🟡 |||
71-
| NEG |||| 🟡 | 🟡 || 🟡 | ||
71+
| NEG |||| 🟡 | 🟡 || 🟡 | 🟡 ||
7272
| NORM ||||| 🟡 ||| 🟡 ||
7373
| NORM_MUL_ADD ||||||||||
7474
| OPT_STEP_ADAMW ||||||||||
75-
| OPT_STEP_SGD |||||||| ||
75+
| OPT_STEP_SGD |||||||| ||
7676
| OUT_PROD | 🟡 || 🟡 | 🟡 ||| 🟡 |||
7777
| PAD |||| 🟡 ||| 🟡 |||
7878
| PAD_REFLECT_1D ||||||||||
@@ -83,7 +83,7 @@ Legend:
8383
| REPEAT_BACK ||||||||||
8484
| RMS_NORM ||||| 🟡 |||||
8585
| RMS_NORM_BACK ||||||||||
86-
| RMS_NORM_MUL_ADD |||||||| ||
86+
| RMS_NORM_MUL_ADD |||||||| ||
8787
| ROLL ||||||||||
8888
| ROPE || 🟡 ||||||||
8989
| ROPE_BACK ||||||||||
@@ -104,15 +104,15 @@ Legend:
104104
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| 🟡 |||
105105
| SOLVE_TRI ||||||||||
106106
| SQR ||||| 🟡 ||| 🟡 ||
107-
| SQRT ||||| 🟡 ||| ||
107+
| SQRT ||||| 🟡 ||| 🟡 ||
108108
| SSM_CONV ||||||||||
109-
| SSM_SCAN |||||||| ||
109+
| SSM_SCAN |||||||| 🟡 ||
110110
| STEP |||| 🟡 | 🟡 || 🟡 |||
111111
| SUB ||||| 🟡 | 🟡 ||||
112-
| SUM |||| 🟡 ||| 🟡 | ||
112+
| SUM |||| 🟡 ||| 🟡 | 🟡 ||
113113
| SUM_ROWS |||| 🟡 ||| 🟡 |||
114114
| SWIGLU ||||| 🟡 ||| 🟡 ||
115-
| SWIGLU_OAI |||||||| ||
115+
| SWIGLU_OAI |||||||| 🟡 ||
116116
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
117117
| TIMESTEP_EMBEDDING ||||||||||
118118
| TOPK_MOE ||||||||||

0 commit comments

Comments
 (0)