Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Model:
gguf_writer: gguf.GGUFWriter
model_name: str | None
metadata_override: Path | None
dir_model_card: Path

# subclasses should define this!
model_arch: gguf.MODEL_ARCH
Expand Down Expand Up @@ -90,6 +91,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path |
self.tensor_names = None
self.metadata_override = metadata_override
self.model_name = model_name
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py

# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
if self.ftype == gguf.LlamaFileType.GUESSED:
Expand Down Expand Up @@ -345,7 +347,7 @@ def prepare_metadata(self, vocab_only: bool):

total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count()

self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params)
self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, self.dir_model_card, total_params)

# Fallback to model directory name if metadata name is still missing
if self.metadata.name is None:
Expand Down
24 changes: 17 additions & 7 deletions convert_lora_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,6 @@ def parse_args() -> argparse.Namespace:
# load base model
logger.info(f"Loading base model: {dir_base_model.name}")
hparams = Model.load_hparams(dir_base_model)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha: float = lparams["lora_alpha"]

with torch.inference_mode():
try:
model_class = Model.from_model_architecture(hparams["architectures"][0])
Expand All @@ -320,12 +314,21 @@ def parse_args() -> argparse.Namespace:
class LoraModel(model_class):
model_arch = model_class.model_arch

lora_alpha: float

def __init__(self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs):

super().__init__(*args, **kwargs)

self.dir_model_card = dir_lora_model
self.lora_alpha = float(lora_alpha)

def set_type(self):
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")

def set_gguf_parameters(self):
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
super().set_gguf_parameters()

def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
Expand Down Expand Up @@ -368,6 +371,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
yield (dest_name + ".lora_a", lora_a)
yield (dest_name + ".lora_b", lora_b)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha: float = lparams["lora_alpha"]

model_instance = LoraModel(
dir_base_model,
ftype,
Expand All @@ -376,6 +384,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
use_temp_file=False,
eager=args.no_lazy,
dry_run=args.dry_run,
dir_lora_model=dir_lora,
lora_alpha=alpha,
)

logger.info("Exporting model...")
Expand Down
34 changes: 25 additions & 9 deletions gguf-py/gguf/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,22 @@ class Metadata:
datasets: Optional[list[str]] = None

@staticmethod
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, model_card_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect

# Create a new Metadata instance
metadata = Metadata()

model_card = Metadata.load_model_card(model_path)
if model_card_path is None:
model_card_path = model_path

model_card = Metadata.load_model_card(model_card_path)
hf_params = Metadata.load_hf_parameters(model_path)

# heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_card_path, total_params)

# Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp
Expand Down Expand Up @@ -177,6 +180,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
org_component = None

name_parts: list[str] = model_full_name_component.split('-')

# Remove empty parts
for i in reversed(range(len(name_parts))):
if len(name_parts[i]) == 0:
del name_parts[i]

name_types: list[
set[Literal["basename", "size_label", "finetune", "version", "type"]]
] = [set() for _ in name_parts]
Expand Down Expand Up @@ -227,6 +236,13 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
if part.lower() == "lora":
name_parts[i] = "LoRA"

# Ignore word-based size labels when there is at least a number-based one present
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
for n, t in zip(name_parts, name_types):
if "size_label" in t:
if all(c.isalpha() for c in n):
t.remove("size_label")

at_start = True
# Find the basename through the annotated name
for part, t in zip(name_parts, name_types):
Expand All @@ -240,18 +256,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =

# Remove the basename annotation from trailing version
for part, t in zip(reversed(name_parts), reversed(name_types)):
if "basename" in t:
if len(t) > 1:
t.remove("basename")
if "basename" in t and len(t) > 1:
t.remove("basename")
else:
break

basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
# TODO: should the basename version always be excluded?
# TODO: should multiple versions be joined together?
version = ([v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t] or [None])[-1]
# NOTE: multiple finetune versions are joined together
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None

if size_label is None and finetune is None and version is None:
# Too ambiguous, output nothing
Expand Down
34 changes: 31 additions & 3 deletions gguf-py/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"),
('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, '8B'))

# Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing...
# Non standard naming
self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"),
('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B'))

Expand All @@ -71,7 +71,7 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3),
('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K'))

# None standard and not easy to disambiguate
# Non standard and not easy to disambiguate
self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"),
('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None))

Expand Down Expand Up @@ -123,6 +123,34 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"),
('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B'))

# Ignore full-text size labels when there are number-based ones, and deduplicate size labels
self.assertEqual(gguf.Metadata.get_model_id_components("MaziyarPanahi/GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1"),
('GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1', 'MaziyarPanahi', 'GreenNode-mini', 'multilingual-v1olet-Mistral-Instruct', 'v0.1', '7B'))

# Instruct in a name without a size label
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/Mistral-Nemo-Instruct-2407"),
('Mistral-Nemo-Instruct-2407', 'mistralai', 'Mistral-Nemo', 'Instruct', '2407', None))

# Non-obvious splitting relying on 'chat' keyword
self.assertEqual(gguf.Metadata.get_model_id_components("deepseek-ai/DeepSeek-V2-Chat-0628"),
('DeepSeek-V2-Chat-0628', 'deepseek-ai', 'DeepSeek-V2', 'Chat', '0628', None))

# Multiple versions
self.assertEqual(gguf.Metadata.get_model_id_components("OpenGVLab/Mini-InternVL-Chat-2B-V1-5"),
('Mini-InternVL-Chat-2B-V1-5', 'OpenGVLab', 'Mini-InternVL', 'Chat', 'V1-5', '2B'))

# Too ambiguous
# TODO: should "base" be a 'finetune' or 'size_label'?
# (in this case it should be a size label, but other models use it to signal that they are not finetuned)
self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Florence-2-base"),
('Florence-2-base', 'microsoft', None, None, None, None))

## Invalid cases ##

# Start with a dash and has dashes in rows
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/-Mistral--Nemo-Base-2407-"),
('-Mistral--Nemo-Base-2407-', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None))

def test_apply_metadata_heuristic_from_model_card(self):
model_card = {
'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],
Expand All @@ -134,7 +162,7 @@ def test_apply_metadata_heuristic_from_model_card(self):
}
got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
expect = gguf.Metadata()
expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': 'v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}]
expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': '14-v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}]
expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl']
expect.languages=['en']
expect.datasets=['teknium/OpenHermes-2.5']
Expand Down