Skip to content

Commit a2fb081

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents a3fb31b + bea0452 commit a2fb081

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+13954
-13592
lines changed

convert_hf_to_gguf.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
10541054
if chkhsh == "53e325976a6e142379c19b09afcae354f2f496f147afa8f9e189a33fe4e3024e":
10551055
# ref: https://huggingface.co/ibm-granite/granite-docling-258M
10561056
res = "granite-docling"
1057+
if chkhsh == "f4f37b6c8eb9ea29b3eac6bb8c8487c5ab7885f8d8022e67edc1c68ce8403e95":
1058+
# ref: https://huggingface.co/MiniMaxAI/MiniMax-M2
1059+
res = "minimax-m2"
10571060

10581061
if res is None:
10591062
logger.warning("\n")
@@ -7126,6 +7129,64 @@ def prepare_tensors(self):
71267129
raise ValueError(f"Unprocessed experts: {experts}")
71277130

71287131

7132+
@ModelBase.register("MiniMaxM2ForCausalLM")
7133+
class MiniMaxM2Model(TextModel):
7134+
model_arch = gguf.MODEL_ARCH.MINIMAXM2
7135+
_experts_cache: dict[int, dict[str, Tensor]] = {}
7136+
7137+
def __init__(self, *args, **kwargs):
7138+
super().__init__(*args, **kwargs)
7139+
self.hparams["num_experts"] = self.hparams["num_local_experts"]
7140+
7141+
def set_gguf_parameters(self):
7142+
super().set_gguf_parameters()
7143+
if self.hparams["scoring_func"] == "sigmoid":
7144+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7145+
elif self.hparams["scoring_func"] == "softmax":
7146+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7147+
else:
7148+
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
7149+
7150+
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
7151+
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
7152+
7153+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
7154+
if name.endswith("e_score_correction_bias"):
7155+
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
7156+
7157+
# merge expert weights
7158+
if 'experts' in name:
7159+
n_experts = self.hparams["num_experts"]
7160+
assert bid is not None
7161+
7162+
expert_cache = self._experts_cache.setdefault(bid, {})
7163+
expert_cache[name] = data_torch
7164+
expert_weights = ["w1", "w2", "w3"]
7165+
7166+
# not enough expert weights to merge
7167+
if len(expert_cache) < n_experts * len(expert_weights):
7168+
return []
7169+
7170+
tensors: list[tuple[str, Tensor]] = []
7171+
for w_name in expert_weights:
7172+
datas: list[Tensor] = []
7173+
7174+
for xid in range(n_experts):
7175+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{w_name}.weight"
7176+
datas.append(expert_cache[ename])
7177+
del expert_cache[ename]
7178+
7179+
data_torch = torch.stack(datas, dim=0)
7180+
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
7181+
new_name = self.map_tensor_name(merged_name)
7182+
tensors.append((new_name, data_torch))
7183+
7184+
del self._experts_cache[bid]
7185+
return tensors
7186+
7187+
return super().modify_tensors(data_torch, name, bid)
7188+
7189+
71297190
@ModelBase.register("Dots1ForCausalLM")
71307191
class Dots1Model(Qwen2MoeModel):
71317192
model_arch = gguf.MODEL_ARCH.DOTS1

convert_hf_to_gguf_update.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ class TOKENIZER_TYPE(IntEnum):
141141
{"name": "mellum", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", },
142142
{"name": "bailingmoe2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-mini-base-2.0", },
143143
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
144+
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
144145
]
145146

146147
# some models are known to be broken upstream, so we will skip them as exceptions
@@ -435,7 +436,7 @@ def get_vocab_base_pre(self, tokenizer) -> str:
435436
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
436437
else:
437438
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
438-
except OSError as e:
439+
except (OSError, TypeError) as e:
439440
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
440441
continue # Skip this model and continue with the next one in the loop
441442

ggml/src/ggml-cuda/common.cuh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,11 @@ static const char * cu_get_error_str(CUresult err) {
224224
#define AMD_MFMA_AVAILABLE
225225
#endif // defined(GGML_USE_HIP) && defined(CDNA) && !defined(GGML_HIP_NO_MMQ_MFMA)
226226

227+
// The Volta instructions are in principle available on Turing or newer but they are effectively unusable:
228+
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA
229+
#define VOLTA_MMA_AVAILABLE
230+
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA
231+
227232
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
228233
#define TURING_MMA_AVAILABLE
229234
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
@@ -278,7 +283,10 @@ static bool amd_mfma_available(const int cc) {
278283
#endif //!defined(GGML_HIP_NO_MMQ_MFMA)
279284
}
280285

281-
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
286+
static bool volta_mma_available(const int cc) {
287+
return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) == GGML_CUDA_CC_VOLTA;
288+
}
289+
282290
static bool turing_mma_available(const int cc) {
283291
return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_TURING;
284292
}

0 commit comments

Comments
 (0)