Skip to content
4 changes: 2 additions & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,8 @@ struct common_params {

struct common_params_sampling sampling;
struct common_params_speculative speculative;
struct common_params_vocoder vocoder;
struct common_params_diffusion diffusion;
struct common_params_vocoder vocoder;
struct common_params_diffusion diffusion;

struct common_params_model model;

Expand Down
12 changes: 3 additions & 9 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2949,9 +2949,6 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
def set_vocab(self):
self._set_vocab_gpt2()

self.gguf_writer.add_add_bos_token(True)
self.gguf_writer.add_diffusion_shift_logits(False)

def set_gguf_parameters(self):
super().set_gguf_parameters()
self._try_set_pooling_type()
Expand Down Expand Up @@ -2979,13 +2976,10 @@ def set_gguf_parameters(self):

# LLaDA models use non-causal attention for diffusion, similar to Dream
self.gguf_writer.add_causal_attention(False)
# Handle RoPE scaling similar to LlamaModel and Dream

# Add LLaDA-specific parameters
mask_token_id = self.hparams.get("mask_token_id")

if mask_token_id is not None:
self.gguf_writer.add_mask_token_id(mask_token_id)
# LLaDA specific parameters
self.gguf_writer.add_add_bos_token(True)
self.gguf_writer.add_diffusion_shift_logits(False)

@staticmethod
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
Expand Down
Loading