Skip to content

Commit 98c0e98

Browse files
committed
Model conversion seems succeeded, but tokenizer is not working
1 parent 694b963 commit 98c0e98

File tree

3 files changed

+117
-6
lines changed

3 files changed

+117
-6
lines changed

convert_hf_to_gguf.py

Lines changed: 100 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2292,6 +2292,21 @@ def set_gguf_parameters(self):
22922292
# Store which layers use full attention vs sliding window
22932293
# This may need custom handling in llama.cpp
22942294
pass
2295+
2296+
# Set layer block types for PLaMo2 hybrid architecture
2297+
# PLaMo2 alternates between mamba and attention layers
2298+
mamba_step = hparams.get("mamba_step", 2)
2299+
num_layers = hparams.get("num_hidden_layers", 32)
2300+
2301+
layer_types = []
2302+
for i in range(num_layers):
2303+
# Based on PLaMo2 architecture: even layers are mamba, odd layers are attention
2304+
if i % mamba_step == 0:
2305+
layer_types.append("mamba")
2306+
else:
2307+
layer_types.append("attention")
2308+
2309+
self.gguf_writer.add_array("plamo2.layers_block_type", layer_types)
22952310

22962311
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
22972312
# Handle Plamo2 specific tensor naming
@@ -2327,22 +2342,101 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
23272342

23282343
# Handle Mamba-specific A_log tensor transformation
23292344
if name.endswith(".A_log"):
2330-
# Map the tensor name first
2345+
# Map the A_log tensor directly to ssm_a
23312346
new_name = self.map_tensor_name(name)
2332-
logger.debug(f"A_log --> A ==> {new_name}")
2347+
# Add .weight suffix if not present
2348+
if not new_name.endswith(".weight"):
2349+
new_name += ".weight"
2350+
logger.debug(f"A_log --> A ==> {new_name}, original shape: {data_torch.shape}")
2351+
23332352
# Transform A_log to A: A = -exp(A_log)
23342353
data_torch = -torch.exp(data_torch)
2354+
2355+
# PLaMo2 A_log is shape {d_state} but llama.cpp expects {d_state, d_inner}
2356+
# Expand the tensor to the correct shape
2357+
if len(data_torch.shape) == 1:
2358+
d_state = data_torch.shape[0] # 64
2359+
d_inner = 8192 # SSM inner size for PLaMo2
2360+
2361+
# Create tensor with correct shape {d_state, d_inner} = {64, 8192}
2362+
# Each row of the matrix should contain the same value from the original 1D tensor
2363+
new_tensor = data_torch.new_zeros((d_state, d_inner))
2364+
for i in range(d_state):
2365+
new_tensor[i, :] = data_torch[i] # Broadcast the single value across the inner dimension
2366+
data_torch = new_tensor
2367+
logger.debug(f"Expanded A tensor from {d_state} to shape: {data_torch.shape}")
2368+
2369+
return [(new_name, data_torch)]
2370+
2371+
# Handle Mamba D tensor - ensure .weight suffix
2372+
if name.endswith("mixer.D") or name.endswith("ssm.D"):
2373+
new_name = self.map_tensor_name(name)
2374+
# Add .weight suffix if not present
2375+
if not new_name.endswith(".weight"):
2376+
new_name += ".weight"
2377+
logger.debug(f"D tensor ==> {new_name}")
23352378
return [(new_name, data_torch)]
23362379

23372380
# Handle Mamba conv1d tensor shape adjustment
2338-
if "mixer.conv1d" in name:
2381+
if "mixer.conv1d" in name or ".ssm.conv1d" in name:
23392382
new_name = self.map_tensor_name(name)
2340-
# Squeeze the conv1d tensor if needed
2341-
if len(data_torch.shape) == 4:
2383+
# For PLaMo2 conv1d tensors, reshape from (kernel_size, 1, d_inner) to (d_inner, kernel_size)
2384+
if len(data_torch.shape) == 3 and data_torch.shape[1] == 1:
2385+
# PLaMo2 conv1d is (kernel_size, 1, d_inner), needs to be (d_inner, kernel_size)
2386+
data_torch = data_torch.squeeze(1).transpose(0, 1)
2387+
elif len(data_torch.shape) == 4:
2388+
# For other formats, squeeze and transpose as needed
23422389
data_torch = data_torch.squeeze()
2390+
if len(data_torch.shape) == 2:
2391+
# If it ends up as (kernel_size, d_inner), transpose to (d_inner, kernel_size)
2392+
if data_torch.shape[0] < data_torch.shape[1]:
2393+
data_torch = data_torch.transpose(0, 1)
23432394
return [(new_name, data_torch)]
23442395

2345-
return super().modify_tensors(data_torch, name, bid)
2396+
# Handle Mamba ssm_dt tensor shape adjustment
2397+
if "mixer.dt_proj" in name:
2398+
new_name = self.map_tensor_name(name)
2399+
logger.debug(f"Processing dt_proj tensor: {name} -> {new_name}, original shape: {data_torch.shape}")
2400+
2401+
# For PLaMo2 dt_proj tensors, original shape is (64, 256) but llama.cpp expects (256, 8192)
2402+
# The GGUF writer seems to transpose tensors, so we need to account for that.
2403+
# We want the final result to be (256, 8192) after GGUF transposition
2404+
2405+
# First transpose from (64, 256) to (256, 64)
2406+
if len(data_torch.shape) == 2 and data_torch.shape[0] == 64 and data_torch.shape[1] == 256:
2407+
data_torch = data_torch.transpose(0, 1) # Now (256, 64)
2408+
logger.debug(f"Transposed dt_proj to shape: {data_torch.shape}")
2409+
2410+
# Expand the second dimension from 64 to 8192 (ssm_inner_size)
2411+
if len(data_torch.shape) == 2 and data_torch.shape[1] == 64:
2412+
# ssm_inner_size should be 8192 for PLaMo2 (64 heads * 128 dim_per_head)
2413+
expected_inner_size = 8192
2414+
repeat_factor = expected_inner_size // data_torch.shape[1]
2415+
data_torch = data_torch.repeat(1, repeat_factor)
2416+
logger.debug(f"Expanded dt_proj to shape: {data_torch.shape}")
2417+
2418+
# Since GGUF writer might transpose, we need to ensure we get (256, 8192) in the end
2419+
# If we currently have (256, 8192) and GGUF transposes to (8192, 256),
2420+
# we need to pre-transpose to (8192, 256) so GGUF ends up with (256, 8192)
2421+
if len(data_torch.shape) == 2 and data_torch.shape == torch.Size([256, 8192]):
2422+
data_torch = data_torch.transpose(0, 1) # Pre-transpose to (8192, 256)
2423+
logger.debug(f"Pre-transposed dt_proj for GGUF writer: {data_torch.shape}")
2424+
2425+
return [(new_name, data_torch)]
2426+
2427+
# Fix tensor name mappings for PLaMo2 to match llama.cpp expectations
2428+
result = super().modify_tensors(data_torch, name, bid)
2429+
fixed_result = []
2430+
for tensor_name, tensor_data in result:
2431+
# Map PLaMo2-specific norm tensor names to match llama.cpp expectations
2432+
if ".attn_norm_2.weight" in tensor_name:
2433+
tensor_name = tensor_name.replace(".attn_norm_2.weight", ".post_attn_norm.weight")
2434+
elif ".post_ffw_norm.weight" in tensor_name:
2435+
tensor_name = tensor_name.replace(".post_ffw_norm.weight", ".post_mlp_norm.weight")
2436+
2437+
fixed_result.append((tensor_name, tensor_data))
2438+
2439+
return fixed_result
23462440

23472441

23482442
@ModelBase.register("DeciLMForCausalLM")

src/llama-arch.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,12 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
767767
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
768768
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
769769
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
770+
771+
// PLaMo2-specific SSM norm weights and biases
772+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm_weight" },
773+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm_weight" },
774+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm_weight" },
775+
{ LLM_TENSOR_SSM_DT_BIAS, "blk.%d.ssm_dt_bias" },
770776
},
771777
},
772778
{
@@ -1650,6 +1656,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
16501656
{LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
16511657
{LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
16521658
{LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1659+
{LLM_TENSOR_SSM_BCDT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
16531660
{LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
16541661
{LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
16551662
{LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
@@ -1674,6 +1681,10 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
16741681
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
16751682
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
16761683
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1684+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1685+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1686+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1687+
{LLM_TENSOR_SSM_DT_BIAS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
16771688
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16781689
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16791690
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1696,8 +1707,10 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
16961707
{LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16971708
{LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16981709
{LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1710+
{LLM_TENSOR_POST_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16991711
{LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17001712
{LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1713+
{LLM_TENSOR_POST_MLP_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17011714
{LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17021715
{LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17031716
{LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},

src/llama-arch.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,10 @@ enum llm_tensor {
273273
LLM_TENSOR_SSM_A,
274274
LLM_TENSOR_SSM_D,
275275
LLM_TENSOR_SSM_OUT,
276+
LLM_TENSOR_SSM_B_NORM,
277+
LLM_TENSOR_SSM_C_NORM,
278+
LLM_TENSOR_SSM_DT_NORM,
279+
LLM_TENSOR_SSM_DT_BIAS,
276280
LLM_TENSOR_TIME_MIX_W0,
277281
LLM_TENSOR_TIME_MIX_W1,
278282
LLM_TENSOR_TIME_MIX_W2,

0 commit comments

Comments
 (0)