@@ -72,6 +72,7 @@ class ModelBase:
7272 endianess : gguf .GGUFEndian
7373 use_temp_file : bool
7474 lazy : bool
75+ dry_run : bool
7576 part_names : list [str ]
7677 is_safetensors : bool
7778 hparams : dict [str , Any ]
@@ -109,6 +110,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
109110 self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
110111 self .use_temp_file = use_temp_file
111112 self .lazy = not eager or (remote_hf_model_id is not None )
113+ self .dry_run = dry_run
112114 self .remote_hf_model_id = remote_hf_model_id
113115 if remote_hf_model_id is not None :
114116 self .is_safetensors = True
@@ -5188,11 +5190,35 @@ def modify_tensors(self, data_torch, name, bid):
51885190@ModelBase .register ("XLMRobertaModel" , "XLMRobertaForSequenceClassification" )
51895191class XLMRobertaModel (BertModel ):
51905192 model_arch = gguf .MODEL_ARCH .BERT
5193+ _lora_files = {}
5194+ _lora_names = []
51915195
5192- def __init__ (self , * args , ** kwargs ):
5193- super ().__init__ (* args , ** kwargs )
5196+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , ** kwargs : Any ):
5197+ hparams = kwargs .pop ("hparams" , None )
5198+ if hparams is None :
5199+ hparams = ModelBase .load_hparams (dir_model , False )
5200+
5201+ if lora_names := hparams .get ("lora_adaptations" ):
5202+ self ._lora_names = lora_names
5203+ self .model_arch = gguf .MODEL_ARCH .JINA_BERT_V3
5204+
5205+ super ().__init__ (dir_model , ftype , fname_out , hparams = hparams , ** kwargs )
51945206 self ._xlmroberta_tokenizer_init ()
51955207
5208+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
5209+ if self ._lora_names :
5210+ for name in self ._lora_names :
5211+ fname = self .add_prefix_to_filename (self .fname_out , f"lora-{ name } -" )
5212+ self ._lora_files [name ] = gguf .GGUFWriter (fname , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file , dry_run = self .dry_run )
5213+
5214+ return super ().generate_extra_tensors ()
5215+
5216+ def set_type (self ):
5217+ for lora_writer in self ._lora_files .values ():
5218+ lora_writer .add_type (gguf .GGUFType .ADAPTER )
5219+ lora_writer .add_string (gguf .Keys .Adapter .TYPE , "lora" )
5220+ super ().set_type ()
5221+
51965222 def set_vocab (self ):
51975223 self ._xlmroberta_set_vocab ()
51985224
@@ -5202,13 +5228,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
52025228 if name .startswith ("roberta." ):
52035229 name = name [8 :]
52045230
5231+ # jina-embeddings-v3
5232+ if ".parametrizations." in name :
5233+ name = name .replace (".parametrizations." , "." )
5234+ if name .endswith (".original" ):
5235+ name = name [:- 9 ]
5236+
52055237 # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
52065238 if name == "embeddings.position_embeddings.weight" :
52075239 if self ._position_offset is not None :
52085240 data_torch = data_torch [self ._position_offset :,:]
52095241
5242+ if name .endswith (".0.lora_A" ) or name .endswith (".0.lora_B" ):
5243+ if name .startswith ("pooler.dense" ):
5244+ return []
5245+
5246+ num_loras = data_torch .size (0 )
5247+ assert num_loras == len (self ._lora_names )
5248+
5249+ # Split out each LoRA in their own GGUF
5250+ for i , lora_writer in enumerate (self ._lora_files .values ()):
5251+ new_name = self .map_tensor_name (name [:- 9 ]) + name [- 7 :].lower ()
5252+ data = data_torch [i , :, :]
5253+ # Transpose/flip token_embd/types into correct shape
5254+ if new_name == "token_embd.weight.lora_b" :
5255+ data = data .T
5256+ elif new_name .startswith ("token_types.weight." ):
5257+ new_name = new_name [:- 1 ] + ("a" if new_name [- 1 :] == "b" else "b" )
5258+ lora_writer .add_tensor (new_name , data .float ().numpy (), raw_dtype = gguf .GGMLQuantizationType .F32 )
5259+
5260+ return []
5261+
52105262 return super ().modify_tensors (data_torch , name , bid )
52115263
5264+ def set_gguf_parameters (self ):
5265+ super ().set_gguf_parameters ()
5266+
5267+ # jina-embeddings-v3
5268+ if rotary_emb_base := self .hparams .get ("rotary_emb_base" ):
5269+ self .gguf_writer .add_rope_freq_base (rotary_emb_base )
5270+ lora_alpha = self .hparams .get ("lora_alpha" )
5271+ if lora_prompt_prefixes := self .hparams .get ("task_instructions" ):
5272+ assert self ._lora_files and all (lora_name in lora_prompt_prefixes for lora_name in self ._lora_files .keys ())
5273+ for lora_name , lora_writer in self ._lora_files .items ():
5274+ lora_writer .add_float32 (gguf .Keys .Adapter .LORA_ALPHA , lora_alpha if lora_alpha is not None else 1.0 )
5275+ lora_writer .add_string (gguf .Keys .Adapter .LORA_TASK_NAME , lora_name )
5276+ if lora_prompt_prefixes :
5277+ lora_writer .add_string (gguf .Keys .Adapter .LORA_PROMPT_PREFIX , lora_prompt_prefixes [lora_name ])
5278+
5279+ def write (self ):
5280+ super ().write ()
5281+ for lora_writer in self ._lora_files .values ():
5282+ lora_writer .write_header_to_file ()
5283+ lora_writer .write_kv_data_to_file ()
5284+ lora_writer .write_tensors_to_file (progress = True )
5285+ lora_writer .close ()
5286+
52125287
52135288@ModelBase .register ("GemmaForCausalLM" )
52145289class GemmaModel (TextModel ):
0 commit comments