@@ -72,6 +72,7 @@ class ModelBase:
7272 endianess : gguf .GGUFEndian
7373 use_temp_file : bool
7474 lazy : bool
75+ dry_run : bool
7576 part_names : list [str ]
7677 is_safetensors : bool
7778 hparams : dict [str , Any ]
@@ -111,6 +112,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
111112 self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
112113 self .use_temp_file = use_temp_file
113114 self .lazy = not eager or (remote_hf_model_id is not None )
115+ self .dry_run = dry_run
114116 self .remote_hf_model_id = remote_hf_model_id
115117 if remote_hf_model_id is not None :
116118 self .is_safetensors = True
@@ -4871,11 +4873,35 @@ def modify_tensors(self, data_torch, name, bid):
48714873@ModelBase .register ("XLMRobertaModel" , "XLMRobertaForSequenceClassification" )
48724874class XLMRobertaModel (BertModel ):
48734875 model_arch = gguf .MODEL_ARCH .BERT
4876+ _lora_files = {}
4877+ _lora_names = []
48744878
4875- def __init__ (self , * args , ** kwargs ):
4876- super ().__init__ (* args , ** kwargs )
4879+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , ** kwargs : Any ):
4880+ hparams = kwargs .pop ("hparams" , None )
4881+ if hparams is None :
4882+ hparams = ModelBase .load_hparams (dir_model , False )
4883+
4884+ if lora_names := hparams .get ("lora_adaptations" ):
4885+ self ._lora_names = lora_names
4886+ self .model_arch = gguf .MODEL_ARCH .JINA_BERT_V3
4887+
4888+ super ().__init__ (dir_model , ftype , fname_out , hparams = hparams , ** kwargs )
48774889 self ._xlmroberta_tokenizer_init ()
48784890
4891+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
4892+ if self ._lora_names :
4893+ for name in self ._lora_names :
4894+ fname = self .add_prefix_to_filename (self .fname_out , f"lora-{ name } -" )
4895+ self ._lora_files [name ] = gguf .GGUFWriter (fname , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file , dry_run = self .dry_run )
4896+
4897+ return super ().generate_extra_tensors ()
4898+
4899+ def set_type (self ):
4900+ for lora_writer in self ._lora_files .values ():
4901+ lora_writer .add_type (gguf .GGUFType .ADAPTER )
4902+ lora_writer .add_string (gguf .Keys .Adapter .TYPE , "lora" )
4903+ super ().set_type ()
4904+
48794905 def set_vocab (self ):
48804906 self ._xlmroberta_set_vocab ()
48814907
@@ -4885,13 +4911,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
48854911 if name .startswith ("roberta." ):
48864912 name = name [8 :]
48874913
4914+ # jina-embeddings-v3
4915+ if ".parametrizations." in name :
4916+ name = name .replace (".parametrizations." , "." )
4917+ if name .endswith (".original" ):
4918+ name = name [:- 9 ]
4919+
48884920 # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
48894921 if name == "embeddings.position_embeddings.weight" :
48904922 if self ._position_offset is not None :
48914923 data_torch = data_torch [self ._position_offset :,:]
48924924
4925+ if name .endswith (".0.lora_A" ) or name .endswith (".0.lora_B" ):
4926+ if name .startswith ("pooler.dense" ):
4927+ return []
4928+
4929+ num_loras = data_torch .size (0 )
4930+ assert num_loras == len (self ._lora_names )
4931+
4932+ # Split out each LoRA in their own GGUF
4933+ for i , lora_writer in enumerate (self ._lora_files .values ()):
4934+ new_name = self .map_tensor_name (name [:- 9 ]) + name [- 7 :].lower ()
4935+ data = data_torch [i , :, :]
4936+ # Transpose/flip token_embd/types into correct shape
4937+ if new_name == "token_embd.weight.lora_b" :
4938+ data = data .T
4939+ elif new_name .startswith ("token_types.weight." ):
4940+ new_name = new_name [:- 1 ] + ("a" if new_name [- 1 :] == "b" else "b" )
4941+ lora_writer .add_tensor (new_name , data .float ().numpy (), raw_dtype = gguf .GGMLQuantizationType .F32 )
4942+
4943+ return []
4944+
48934945 return super ().modify_tensors (data_torch , name , bid )
48944946
4947+ def set_gguf_parameters (self ):
4948+ super ().set_gguf_parameters ()
4949+
4950+ # jina-embeddings-v3
4951+ if rotary_emb_base := self .hparams .get ("rotary_emb_base" ):
4952+ self .gguf_writer .add_rope_freq_base (rotary_emb_base )
4953+ lora_alpha = self .hparams .get ("lora_alpha" )
4954+ if lora_prompt_prefixes := self .hparams .get ("task_instructions" ):
4955+ assert self ._lora_files and all (lora_name in lora_prompt_prefixes for lora_name in self ._lora_files .keys ())
4956+ for lora_name , lora_writer in self ._lora_files .items ():
4957+ lora_writer .add_float32 (gguf .Keys .Adapter .LORA_ALPHA , lora_alpha if lora_alpha is not None else 1.0 )
4958+ lora_writer .add_string (gguf .Keys .Adapter .LORA_TASK_NAME , lora_name )
4959+ if lora_prompt_prefixes :
4960+ lora_writer .add_string (gguf .Keys .Adapter .LORA_PROMPT_PREFIX , lora_prompt_prefixes [lora_name ])
4961+
4962+ def write (self ):
4963+ super ().write ()
4964+ for lora_writer in self ._lora_files .values ():
4965+ lora_writer .write_header_to_file ()
4966+ lora_writer .write_kv_data_to_file ()
4967+ lora_writer .write_tensors_to_file (progress = True )
4968+ lora_writer .close ()
4969+
48954970
48964971@ModelBase .register ("GemmaForCausalLM" )
48974972class GemmaModel (TextModel ):
0 commit comments