Skip to content

Commit 40ab1ab

Browse files
committed
fix mul_mat_id read out-of-bound
1 parent 9f05741 commit 40ab1ab

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

examples/tts/convert_csm_to_gguf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ def rename_transformer(name: str) -> str:
179179
is_decoder = True
180180
name = "audio_head.weight"
181181
if component == "decoder":
182-
# add padding at the beginning so that build_lora_mm_id can be used
182+
# add padding at the beginning and the end so that build_lora_mm_id can be used
183183
zero_tensor = torch.zeros(1, 1024, 2051)
184-
data_torch = torch.cat([zero_tensor, data_torch], dim=0)
185-
assert data_torch.shape == (32, 1024, 2051)
184+
data_torch = torch.cat([zero_tensor, data_torch, zero_tensor], dim=0)
185+
assert data_torch.shape == (33, 1024, 2051)
186186
# then, transpose it
187187
data_torch = data_torch.transpose(1, 2)
188188

src/llama-model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,15 +1662,15 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
16621662
{
16631663
// TODO: maybe store these in gguf metadata
16641664
int64_t csm_audio_cbook_size = 2051; // audio codebook size
1665-
int64_t csm_acoustic_tokens = 32; // equal to number of acoutic tokens for Mimi
1665+
int64_t csm_audio_tokens = 32; // equal to number of audio tokens for Mimi
16661666
//int64_t csm_n_audio_vocab = csm_audio_cbook_size*csm_acoustic_tokens;
16671667

16681668
csm_output_cbook = create_tensor(tn(LLM_TENSOR_CSM_CBOOK_OUTPUT, "weight"), {n_embd, csm_audio_cbook_size}, TENSOR_NOT_REQUIRED);
16691669

16701670
bool is_backbone = csm_output_cbook != nullptr;
16711671

16721672
csm_output_audio = is_backbone ? nullptr
1673-
: create_tensor(tn(LLM_TENSOR_CSM_AUDIO_OUTPUT, "weight"), {n_embd, csm_audio_cbook_size, csm_acoustic_tokens}, 0);
1673+
: create_tensor(tn(LLM_TENSOR_CSM_AUDIO_OUTPUT, "weight"), {n_embd, csm_audio_cbook_size, csm_audio_tokens+1}, 0);
16741674

16751675
tok_embd = is_backbone
16761676
? create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0)

0 commit comments

Comments
 (0)