Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit cc8b4d6

Browse files
committed
update model config for gguf
1 parent 4b666a7 commit cc8b4d6

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

torchchat/utils/gguf_loader.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -542,15 +542,19 @@ def load_model(gguf_file: str) -> torch.nn.Module:
542542
assert arch == "llama", "Only LLaMa models are supported by this converter."
543543

544544
model_args = ModelArgs(
545-
TransformerArgs(
546-
dim=metadata[f"{arch}.embedding_length"],
547-
n_layers=metadata[f"{arch}.block_count"],
548-
n_heads=metadata[f"{arch}.attention.head_count"],
549-
n_local_heads=metadata[f"{arch}.attention.head_count_kv"],
550-
vocab_size=len(metadata["tokenizer.ggml.tokens"]),
551-
norm_eps=metadata[f"{arch}.attention.layer_norm_rms_epsilon"],
552-
hidden_dim=metadata[f"{arch}.feed_forward_length"],
553-
)
545+
{
546+
"text": {
547+
"config": {
548+
"dim": metadata[f"{arch}.embedding_length"],
549+
"n_layers": metadata[f"{arch}.block_count"],
550+
"n_heads": metadata[f"{arch}.attention.head_count"],
551+
"n_local_heads": metadata[f"{arch}.attention.head_count_kv"],
552+
"vocab_size": len(metadata["tokenizer.ggml.tokens"]),
553+
"norm_eps": metadata[f"{arch}.attention.layer_norm_rms_epsilon"],
554+
"hidden_dim": metadata[f"{arch}.feed_forward_length"],
555+
}
556+
}
557+
}
554558
)
555559

556560
# TODO: what to do with rope args like

0 commit comments

Comments
 (0)