Skip to content

Commit 3f2839e

Browse files
committed
Add OPT model support - Add OPT architecture support in C++ code - Implement OPT-specific graph builder with separate Q/K/V projections - Add OPT model conversion support in Python - Add OPT tensor mappings and constants in gguf-py - Support some OPT model sizes - Tested with OPT-125M and OPT-13B models
1 parent 9012eb9 commit 3f2839e

File tree

6 files changed

+261
-5
lines changed

6 files changed

+261
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
803803
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
804804
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
805805
res = "seed-coder"
806+
if chkhsh == "7f2212c1b7fec62b4b75447509a4ecc8acd82813ce90d715dd99c1460a52d978":
807+
# ref: https://huggingface.co/facebook/opt-13b
808+
res = "gpt-2"
809+
if chkhsh == "2c934e5e1c8275b75011b9942836389a87eaa1a63116104e52424515e7649c46":
810+
# ref: https://huggingface.co/SousChef/OPT-13B-Erebus (OPT-13B-Erebus model)
811+
res = "gpt-2"
806812

807813
if res is None:
808814
logger.warning("\n")
@@ -3902,7 +3908,7 @@ def set_vocab(self):
39023908
def set_gguf_parameters(self):
39033909
hparams = self.hparams
39043910
block_count = hparams["num_hidden_layers"]
3905-
3911+
39063912
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
39073913
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
39083914
self.gguf_writer.add_block_count(block_count)
@@ -4014,7 +4020,7 @@ def set_gguf_parameters(self):
40144020

40154021
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
40164022
del bid # unused
4017-
4023+
40184024
if name.startswith("language_model."):
40194025
name = name.replace("language_model.", "")
40204026

@@ -4520,7 +4526,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
45204526
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
45214527
if name.endswith("k_proj.weight"):
45224528
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
4523-
4529+
45244530
return [(self.map_tensor_name(name), data_torch)]
45254531

45264532

@@ -5231,7 +5237,7 @@ def set_gguf_parameters(self):
52315237

52325238
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
52335239
del bid # unused
5234-
5240+
52355241
# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
52365242
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
52375243
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
@@ -6124,6 +6130,26 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
61246130
return cls._wrap_fn(func)(*args, **kwargs)
61256131

61266132

6133+
@ModelBase.register("OPTForCausalLM")
6134+
class OPTModel(TextModel):
6135+
model_arch = gguf.MODEL_ARCH.OPT
6136+
6137+
def set_vocab(self):
6138+
# OPT typically uses GPT2 tokenizer
6139+
self._set_vocab_gpt2()
6140+
6141+
def set_gguf_parameters(self):
6142+
super().set_gguf_parameters()
6143+
hparams = self.hparams
6144+
6145+
# OPT-specific parameters that are not handled by the base class
6146+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
6147+
6148+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6149+
# OPT model uses standard tensor mapping - let the mapping handle the conversion
6150+
return [(self.map_tensor_name(name), data_torch)]
6151+
6152+
61276153
def parse_args() -> argparse.Namespace:
61286154
parser = argparse.ArgumentParser(
61296155
description="Convert a huggingface model to a GGML compatible file")

gguf-py/gguf/constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ class MODEL_ARCH(IntEnum):
340340
WAVTOKENIZER_DEC = auto()
341341
PLM = auto()
342342
BAILINGMOE = auto()
343+
OPT = auto()
343344

344345

345346
class VISION_PROJECTOR_TYPE(IntEnum):
@@ -620,6 +621,7 @@ class MODEL_TENSOR(IntEnum):
620621
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
621622
MODEL_ARCH.PLM: "plm",
622623
MODEL_ARCH.BAILINGMOE: "bailingmoe",
624+
MODEL_ARCH.OPT: "opt",
623625
}
624626

625627
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -2040,6 +2042,20 @@ class MODEL_TENSOR(IntEnum):
20402042
MODEL_TENSOR.FFN_DOWN_SHEXP,
20412043
MODEL_TENSOR.FFN_UP_SHEXP,
20422044
],
2045+
MODEL_ARCH.OPT: [
2046+
MODEL_TENSOR.TOKEN_EMBD,
2047+
MODEL_TENSOR.POS_EMBD,
2048+
MODEL_TENSOR.OUTPUT_NORM,
2049+
MODEL_TENSOR.OUTPUT,
2050+
MODEL_TENSOR.ATTN_NORM,
2051+
MODEL_TENSOR.ATTN_Q,
2052+
MODEL_TENSOR.ATTN_K,
2053+
MODEL_TENSOR.ATTN_V,
2054+
MODEL_TENSOR.ATTN_OUT,
2055+
MODEL_TENSOR.FFN_NORM,
2056+
MODEL_TENSOR.FFN_DOWN,
2057+
MODEL_TENSOR.FFN_UP,
2058+
],
20432059
# TODO
20442060
}
20452061

gguf-py/gguf/tensor_mapping.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TensorNameMap:
3131
"model.embeddings", # rwkv7
3232
"model.word_embeddings", # bailingmoe
3333
"language_model.model.embed_tokens", # llama4
34+
"model.decoder.embed_tokens", # opt
3435
),
3536

3637
# Token type embeddings
@@ -56,6 +57,7 @@ class TensorNameMap:
5657
"transformer.wpe", # gpt2
5758
"embeddings.position_embeddings", # bert
5859
"wpe", # gpt2
60+
"model.decoder.embed_positions", # opt
5961
),
6062

6163
# Output
@@ -68,7 +70,7 @@ class TensorNameMap:
6870
"output_layer", # chatglm
6971
"head", # rwkv
7072
"head.out", # wavtokenizer
71-
"lm_head", # llama4
73+
"lm_head", # llama4 opt
7274
),
7375

7476
# Output norm
@@ -92,6 +94,7 @@ class TensorNameMap:
9294
"model.ln_out", # rwkv7
9395
"backbone.final_layer_norm", # wavtokenizer
9496
"model.norm", # llama4
97+
"model.decoder.final_layer_norm", # opt
9598
),
9699

97100
# Rope frequencies
@@ -134,6 +137,7 @@ class TensorNameMap:
134137
"rwkv.blocks.{bid}.ln1", # rwkv6
135138
"model.layers.{bid}.ln1", # rwkv7
136139
"model.layers.{bid}.input_layernorm", # llama4
140+
"model.decoder.layers.{bid}.self_attn_layer_norm", # opt
137141
),
138142

139143
# Attention norm 2
@@ -174,6 +178,7 @@ class TensorNameMap:
174178
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
175179
"transformer.h.{bid}.attn.attention.q_proj", # exaone
176180
"model.layers.{bid}.self_attn.q_proj", # llama4
181+
"model.decoder.layers.{bid}.self_attn.q_proj", # opt
177182
),
178183

179184
# Attention key
@@ -189,6 +194,7 @@ class TensorNameMap:
189194
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
190195
"transformer.h.{bid}.attn.attention.k_proj", # exaone
191196
"model.layers.{bid}.self_attn.k_proj", # llama4
197+
"model.decoder.layers.{bid}.self_attn.k_proj", # opt
192198
),
193199

194200
# Attention value
@@ -203,6 +209,7 @@ class TensorNameMap:
203209
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
204210
"transformer.h.{bid}.attn.attention.v_proj", # exaone
205211
"model.layers.{bid}.self_attn.v_proj", # llama4
212+
"model.decoder.layers.{bid}.self_attn.v_proj", # opt
206213
),
207214

208215
# Attention output
@@ -230,6 +237,7 @@ class TensorNameMap:
230237
"transformer.layers.{bid}.attn.out_proj", # openelm
231238
"transformer.h.{bid}.attn.attention.out_proj", # exaone
232239
"model.layers.{bid}.self_attn.o_proj", # llama4
240+
"model.decoder.layers.{bid}.self_attn.out_proj", # opt
233241
),
234242

235243
# Attention output norm
@@ -269,6 +277,7 @@ class TensorNameMap:
269277
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
270278
"transformer.layers.{bid}.ffn_norm", # openelm
271279
"model.layers.{bid}.post_attention_layernorm", # llama4
280+
"model.decoder.layers.{bid}.final_layer_norm", # opt
272281
),
273282

274283
# Post feed-forward norm
@@ -330,6 +339,7 @@ class TensorNameMap:
330339
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
331340
"transformer.h.{bid}.mlp.c_fc_1", # exaone
332341
"model.layers.{bid}.feed_forward.up_proj", # llama4
342+
"model.decoder.layers.{bid}.fc1", # opt
333343
),
334344

335345
MODEL_TENSOR.FFN_UP_EXP: (
@@ -411,6 +421,7 @@ class TensorNameMap:
411421
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
412422
"model.layers.h.{bid}.mlp.c_proj", # exaone
413423
"model.layers.{bid}.feed_forward.down_proj", # llama4
424+
"model.decoder.layers.{bid}.fc2", # opt
414425
),
415426

416427
MODEL_TENSOR.FFN_DOWN_EXP: (

src/llama-arch.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7272
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
7373
{ LLM_ARCH_PLM, "plm" },
7474
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
75+
{ LLM_ARCH_OPT, "opt" },
7576
{ LLM_ARCH_UNKNOWN, "(unknown)" },
7677
};
7778

@@ -1530,6 +1531,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
15301531
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
15311532
},
15321533
},
1534+
{
1535+
LLM_ARCH_OPT,
1536+
{
1537+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1538+
{ LLM_TENSOR_POS_EMBD, "position_embd" },
1539+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1540+
{ LLM_TENSOR_OUTPUT, "output" },
1541+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1542+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1543+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1544+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1545+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1546+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1547+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1548+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1549+
},
1550+
},
15331551
{
15341552
LLM_ARCH_BAILINGMOE,
15351553
{

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ enum llm_arch {
7676
LLM_ARCH_WAVTOKENIZER_DEC,
7777
LLM_ARCH_PLM,
7878
LLM_ARCH_BAILINGMOE,
79+
LLM_ARCH_OPT,
7980
LLM_ARCH_UNKNOWN,
8081
};
8182

0 commit comments

Comments
 (0)