@@ -3695,6 +3695,10 @@ def set_gguf_parameters(self):
36953695 self .gguf_writer .add_causal_attention (False )
36963696 self ._try_set_pooling_type ()
36973697
3698+ if cls_out_labels := self .hparams .get ("id2label" ):
3699+ key_name = gguf .Keys .Classifier .OUTPUT_LABELS .format (arch = gguf .MODEL_ARCH_NAMES [self .model_arch ])
3700+ self .gguf_writer .add_array (key_name , [v for k , v in sorted (cls_out_labels .items ())])
3701+
36983702 def set_vocab (self ):
36993703 tokens , toktypes , tokpre = self .get_vocab_base ()
37003704 self .vocab_size = len (tokens )
@@ -3745,12 +3749,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37453749 if name .startswith ("cls.seq_relationship" ):
37463750 return []
37473751
3748- # For BertForSequenceClassification (direct projection layer)
3749- if name == "classifier.weight" :
3750- name = "classifier.out_proj.weight"
3752+ if self .hparams .get ("id2label" ):
3753+ # For BertForSequenceClassification (direct projection layer)
3754+ if name == "classifier.weight" :
3755+ name = "classifier.out_proj.weight"
37513756
3752- if name == "classifier.bias" :
3753- name = "classifier.out_proj.bias"
3757+ if name == "classifier.bias" :
3758+ name = "classifier.out_proj.bias"
37543759
37553760 return [(self .map_tensor_name (name ), data_torch )]
37563761
@@ -3846,7 +3851,7 @@ def _xlmroberta_set_vocab(self) -> None:
38463851 self .gguf_writer .add_add_eos_token (True )
38473852
38483853
3849- @ModelBase .register ("RobertaModel" )
3854+ @ModelBase .register ("RobertaModel" , "RobertaForSequenceClassification" )
38503855class RobertaModel (BertModel ):
38513856 model_arch = gguf .MODEL_ARCH .BERT
38523857
0 commit comments