Skip to content

Commit 13e6d73

Browse files
committed
py : add XLMRobertaForSequenceClassification [no ci]
1 parent acb2c32 commit 13e6d73

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2598,7 +2598,7 @@ def set_gguf_parameters(self):
25982598
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
25992599

26002600

2601-
@Model.register("XLMRobertaModel")
2601+
@Model.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
26022602
class XLMRobertaModel(BertModel):
26032603
model_arch = gguf.MODEL_ARCH.BERT
26042604

@@ -2701,6 +2701,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
27012701
if self._position_offset is not None:
27022702
data_torch = data_torch[self._position_offset:,:]
27032703

2704+
# if name starts with "roberta.", remove the prefix
2705+
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
2706+
if name.startswith("roberta."):
2707+
name = name[8:]
2708+
27042709
return super().modify_tensors(data_torch, name, bid)
27052710

27062711

gguf-py/gguf/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,8 @@ class MODEL_TENSOR(IntEnum):
338338
ENC_FFN_DOWN = auto()
339339
ENC_FFN_UP = auto()
340340
ENC_OUTPUT_NORM = auto()
341+
CLS = auto() # classifier
342+
CLS_OUT = auto() # classifier output projection
341343

342344

343345
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -494,6 +496,8 @@ class MODEL_TENSOR(IntEnum):
494496
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
495497
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
496498
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
499+
MODEL_TENSOR.CLS: "cls",
500+
MODEL_TENSOR.CLS_OUT: "cls.output",
497501
}
498502

499503
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -603,6 +607,8 @@ class MODEL_TENSOR(IntEnum):
603607
MODEL_TENSOR.FFN_DOWN,
604608
MODEL_TENSOR.FFN_UP,
605609
MODEL_TENSOR.LAYER_OUT_NORM,
610+
MODEL_TENSOR.CLS,
611+
MODEL_TENSOR.CLS_OUT,
606612
],
607613
MODEL_ARCH.NOMIC_BERT: [
608614
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,14 @@ class TensorNameMap:
677677
MODEL_TENSOR.ENC_OUTPUT_NORM: (
678678
"encoder.final_layer_norm", # t5
679679
),
680+
681+
MODEL_TENSOR.CLS: (
682+
"classifier.dense", # roberta
683+
),
684+
685+
MODEL_TENSOR.CLS_OUT: (
686+
"classifier.out_proj", # roberta
687+
),
680688
}
681689

682690
# architecture-specific block mappings

0 commit comments

Comments
 (0)