Skip to content

Commit 57201cc

Browse files
committed
Remove adding the mask token
1 parent 9691f4e commit 57201cc

File tree

2 files changed

+5
-11
lines changed

2 files changed

+5
-11
lines changed

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,8 @@ struct common_params {
283283

284284
struct common_params_sampling sampling;
285285
struct common_params_speculative speculative;
286-
struct common_params_vocoder vocoder;
287-
struct common_params_diffusion diffusion;
286+
struct common_params_vocoder vocoder;
287+
struct common_params_diffusion diffusion;
288288

289289
struct common_params_model model;
290290

convert_hf_to_gguf.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,9 +2949,6 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
29492949
def set_vocab(self):
29502950
self._set_vocab_gpt2()
29512951

2952-
self.gguf_writer.add_add_bos_token(True)
2953-
self.gguf_writer.add_diffusion_shift_logits(False)
2954-
29552952
def set_gguf_parameters(self):
29562953
super().set_gguf_parameters()
29572954
self._try_set_pooling_type()
@@ -2979,13 +2976,10 @@ def set_gguf_parameters(self):
29792976

29802977
# LLaDA models use non-causal attention for diffusion, similar to Dream
29812978
self.gguf_writer.add_causal_attention(False)
2982-
# Handle RoPE scaling similar to LlamaModel and Dream
2983-
2984-
# Add LLaDA-specific parameters
2985-
mask_token_id = self.hparams.get("mask_token_id")
29862979

2987-
if mask_token_id is not None:
2988-
self.gguf_writer.add_mask_token_id(mask_token_id)
2980+
# LLaDA specific parameters
2981+
self.gguf_writer.add_add_bos_token(True)
2982+
self.gguf_writer.add_diffusion_shift_logits(False)
29892983

29902984
@staticmethod
29912985
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):

0 commit comments

Comments
 (0)