@@ -64,6 +64,7 @@ class ModelBase:
6464 endianess : gguf .GGUFEndian
6565 use_temp_file : bool
6666 lazy : bool
67+ dry_run : bool
6768 part_names : list [str ]
6869 is_safetensors : bool
6970 hparams : dict [str , Any ]
@@ -98,6 +99,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
9899 self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
99100 self .use_temp_file = use_temp_file
100101 self .lazy = not eager or (remote_hf_model_id is not None )
102+ self .dry_run = dry_run
101103 self .remote_hf_model_id = remote_hf_model_id
102104 if remote_hf_model_id is not None :
103105 self .is_safetensors = True
@@ -4153,18 +4155,31 @@ def modify_tensors(self, data_torch, name, bid):
41534155@ModelBase .register ("XLMRobertaModel" , "XLMRobertaForSequenceClassification" )
41544156class XLMRobertaModel (BertModel ):
41554157 model_arch = gguf .MODEL_ARCH .BERT
4158+ _lora_files = {}
41564159
41574160 def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , ** kwargs : Any ):
41584161 hparams = kwargs .pop ("hparams" , None )
41594162 if hparams is None :
41604163 hparams = ModelBase .load_hparams (dir_model )
41614164
4162- if hparams .get ("lora_adaptations" ):
4165+ if lora_names := hparams .get ("lora_adaptations" ):
41634166 self .model_arch = gguf .MODEL_ARCH .JINA_BERT_V3
41644167
41654168 super ().__init__ (dir_model , ftype , fname_out , hparams = hparams , ** kwargs )
4169+
4170+ if lora_names :
4171+ for name in lora_names :
4172+ fname = self .add_prefix_to_filename (self .fname_out , f"lora-{ name } -" )
4173+ self ._lora_files [name ] = gguf .GGUFWriter (fname , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file , dry_run = self .dry_run )
4174+
41664175 self ._xlmroberta_tokenizer_init ()
41674176
4177+ def set_type (self ):
4178+ for lora_writer in self ._lora_files .values ():
4179+ lora_writer .add_type (gguf .GGUFType .ADAPTER )
4180+ lora_writer .add_string (gguf .Keys .Adapter .TYPE , "lora" )
4181+ super ().set_type ()
4182+
41684183 def set_vocab (self ):
41694184 self ._xlmroberta_set_vocab ()
41704185
@@ -4185,36 +4200,52 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
41854200 if self ._position_offset is not None :
41864201 data_torch = data_torch [self ._position_offset :,:]
41874202
4188- if name .endswith (".weight. 0.lora_A" ) or name .endswith (".weight .0.lora_B" ):
4203+ if name .endswith (".0.lora_A" ) or name .endswith (".0.lora_B" ):
41894204 if name .startswith ("pooler.dense" ):
4190- return
4205+ return []
41914206
4192- lora_name = self .hparams ["lora_adaptations" ]
41934207 num_loras = data_torch .size (0 )
4194- assert num_loras == len (lora_name )
4208+ assert num_loras == len (self ._lora_files )
4209+
4210+ # Split out each LoRA in their own GGUF
4211+ for i , lora_writer in enumerate (self ._lora_files .values ()):
4212+ new_name = self .map_tensor_name (name [:- 9 ]) + name [- 7 :].lower ()
4213+ data_qtype = gguf .GGMLQuantizationType .F32
4214+ data = data_torch [i , :, :]
4215+ # Transpose/flip token_embd/types into correct shape
4216+ if new_name == "token_embd.weight.lora_b" :
4217+ data = data .T
4218+ elif new_name .startswith ("token_types.weight." ):
4219+ new_name = new_name [:- 1 ] + ("a" if new_name [- 1 :] == "b" else "b" )
4220+ data = gguf .quants .quantize (data .numpy (), data_qtype )
4221+ lora_writer .add_tensor (new_name , data , raw_dtype = data_qtype )
41954222
4196- # Split out each LoRA in their own named tensors
4197- # Remove "weight" from the name to not confuse quantize
4198- for i in range (num_loras ):
4199- data_lora = data_torch [i , :, :]
4200- yield (self .map_tensor_name (name [:- 16 ]) + name [- 16 :].lower ().replace ("weight.0." , f"<{ lora_name [i ]} >" ), data_lora )
4201- return
4223+ return []
42024224
4203- yield from super ().modify_tensors (data_torch , name , bid )
4225+ return super ().modify_tensors (data_torch , name , bid )
42044226
42054227 def set_gguf_parameters (self ):
42064228 super ().set_gguf_parameters ()
42074229
42084230 # jina-embeddings-v3
42094231 if rotary_emb_base := self .hparams .get ("rotary_emb_base" ):
42104232 self .gguf_writer .add_rope_freq_base (rotary_emb_base )
4211- if lora_alpha := self .hparams .get ("lora_alpha" ):
4212- self .gguf_writer .add_float32 (gguf .Keys .Adapter .LORA_ALPHA , lora_alpha )
4213- if lora_names := self .hparams .get ("lora_adaptations" ):
4214- self .gguf_writer .add_array (gguf .Keys .Adapter .LORA_NAMES , lora_names )
4233+ lora_alpha = self .hparams .get ("lora_alpha" )
42154234 if lora_prompt_prefixes := self .hparams .get ("task_instructions" ):
4216- assert lora_names and all (lora_name in lora_prompt_prefixes for lora_name in lora_names )
4217- self .gguf_writer .add_array (gguf .Keys .Adapter .LORA_PROMPT_PREFIXES , [lora_prompt_prefixes [lora_name ] for lora_name in lora_names ])
4235+ assert self ._lora_files and all (lora_name in lora_prompt_prefixes for lora_name in self ._lora_files .keys ())
4236+ for lora_name , lora_writer in self ._lora_files .items ():
4237+ lora_writer .add_float32 (gguf .Keys .Adapter .LORA_ALPHA , lora_alpha if lora_alpha is not None else 1.0 )
4238+ lora_writer .add_string (gguf .Keys .Adapter .LORA_TASK_NAME , lora_name )
4239+ if lora_prompt_prefixes :
4240+ lora_writer .add_string (gguf .Keys .Adapter .LORA_PROMPT_PREFIX , lora_prompt_prefixes [lora_name ])
4241+
4242+ def write (self ):
4243+ super ().write ()
4244+ for lora_writer in self ._lora_files .values ():
4245+ lora_writer .write_header_to_file ()
4246+ lora_writer .write_kv_data_to_file ()
4247+ lora_writer .write_tensors_to_file (progress = True )
4248+ lora_writer .close ()
42184249
42194250
42204251@ModelBase .register ("GemmaForCausalLM" )
0 commit comments