From c6396aa4bba8a508b262cc9fb8baada75f620db8 Mon Sep 17 00:00:00 2001 From: Victor Oluwadare <111367022+Victoran0@users.noreply.github.com> Date: Tue, 8 Oct 2024 02:35:08 +0100 Subject: [PATCH 1/2] Added support for SFTTrainer checkpoint models and adapter models containing some non-LoRA weights The previous code triggers an unexpected name error and calls sys.exit(1) (lines 350-351 current version) even if a single weight in the lora_model is not a lora_A, lora_B, or base layer weight. This edit collects the names of all LoRA weights in the model before the for loop in line 341 (current version). And in line 350 (edit version), the subsequent operations are performed only on the LoRA and base layer weights, ignoring any non-LoRA weights in the lora_model. Hopefully, this helps by allowing the script to extract LoRA weights and convert LoRA to GGUF for adapters containing one or more non-LoRA weights. --- convert_lora_to_gguf.py | 51 +++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 439a78de108ca..61a945bd3ad5f 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -338,28 +338,39 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map: dict[str, PartialLoraTensor] = {} + # The following edits will enable conversion for: SFTTrainer checkpoint adapter models and other adapter models that contain weights besides LoRA weights + + # Here, we first get the items with the 'lora_' substring + lora_model_items_name = [name for name,_ in lora_model.items()] + lora_model_items_with_lora_tensors = [name for name in lora_model_items_name if 'lora_' in name] + for name, tensor in lora_model.items(): - if self.lazy: - tensor = LazyTorchTensor.from_eager(tensor) - base_name = get_base_tensor_name(name) - is_lora_a = ".lora_A.weight" in name - is_lora_b = ".lora_B.weight" in name - if not is_lora_a and not is_lora_b: - if ".base_layer.weight" in name: - continue - logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") - sys.exit(1) - - if base_name in tensor_map: - if is_lora_a: - tensor_map[base_name].A = tensor - else: - tensor_map[base_name].B = tensor - else: - if is_lora_a: - tensor_map[base_name] = PartialLoraTensor(A=tensor) + + # Check for only LoRA finetuned weights and base layer weights + if (name in lora_model_items_with_lora_tensors) or (".base_layer.weight" in name): + if self.lazy: + tensor = LazyTorchTensor.from_eager(tensor) + base_name = get_base_tensor_name(name) + is_lora_a = ".lora_A.weight" in name + is_lora_b = ".lora_B.weight" in name + if not is_lora_a and not is_lora_b: + if ".base_layer.weight" in name: + continue + + # we will either have a lora weight or a base layer weight, this error becomes trivial + # logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") + # sys.exit(1) + + if base_name in tensor_map: + if is_lora_a: + tensor_map[base_name].A = tensor + else: + tensor_map[base_name].B = tensor else: - tensor_map[base_name] = PartialLoraTensor(B=tensor) + if is_lora_a: + tensor_map[base_name] = PartialLoraTensor(A=tensor) + else: + tensor_map[base_name] = PartialLoraTensor(B=tensor) for name, tensor in tensor_map.items(): assert tensor.A is not None From c2c2626ec66c312440468a4c89addb8dd14282b8 Mon Sep 17 00:00:00 2001 From: Victor Oluwadare <111367022+Victoran0@users.noreply.github.com> Date: Tue, 8 Oct 2024 20:31:43 +0100 Subject: [PATCH 2/2] Added support for SFTTrainer checkpoint models and adapter models containing one or more non-LoRA weights My initial commit was more like a brute force. The edits suggested by @FirstTimeEZ reduces the complexity. --- convert_lora_to_gguf.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 61a945bd3ad5f..26de8b1ca69b0 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -338,16 +338,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map: dict[str, PartialLoraTensor] = {} - # The following edits will enable conversion for: SFTTrainer checkpoint adapter models and other adapter models that contain weights besides LoRA weights - - # Here, we first get the items with the 'lora_' substring - lora_model_items_name = [name for name,_ in lora_model.items()] - lora_model_items_with_lora_tensors = [name for name in lora_model_items_name if 'lora_' in name] - for name, tensor in lora_model.items(): - - # Check for only LoRA finetuned weights and base layer weights - if (name in lora_model_items_with_lora_tensors) or (".base_layer.weight" in name): + if ("lora_" in name) or (".base_layer.weight" in name): if self.lazy: tensor = LazyTorchTensor.from_eager(tensor) base_name = get_base_tensor_name(name) @@ -356,11 +348,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: if not is_lora_a and not is_lora_b: if ".base_layer.weight" in name: continue - - # we will either have a lora weight or a base layer weight, this error becomes trivial - # logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") - # sys.exit(1) - + if base_name in tensor_map: if is_lora_a: tensor_map[base_name].A = tensor @@ -371,6 +359,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map[base_name] = PartialLoraTensor(A=tensor) else: tensor_map[base_name] = PartialLoraTensor(B=tensor) + else: + pass for name, tensor in tensor_map.items(): assert tensor.A is not None