From 683308a988369b5b9ccb4284a4454aca947857f6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 28 Feb 2025 16:28:52 +0100 Subject: [PATCH 1/3] convert : fix Norway problem when parsing YAML --- gguf-py/gguf/metadata.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 962c27b204464..75608a1683992 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -121,19 +121,39 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: if not model_card_path.is_file(): return {} - # The model card metadata is assumed to always be in YAML + # The model card metadata is assumed to always be in YAML (frontmatter) # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473 + yaml_content: str = "" with open(model_card_path, "r", encoding="utf-8") as f: - if f.readline() == "---\n": - raw = f.read().partition("---\n")[0] - data = yaml.safe_load(raw) - if isinstance(data, dict): - return data + content = f.read() + lines = content.splitlines() + lines_yaml = [] + if len(lines) == 0: + # Empty file + return {} + if len(lines) > 0 and lines[0] != "---": + # No frontmatter + return {} + for line in lines[1:]: + if line == "---": + break # End of frontmatter else: - logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict") - return {} + lines_yaml.append(line) + yaml_content = "\n".join(lines_yaml) + + # Quick hack to fix the Norway problem + # https://hitchdev.com/strictyaml/why/implicit-typing-removed/ + yaml_content = yaml_content.replace("- no\n", "- \"no\"\n") + + if yaml_content: + data = yaml.safe_load(yaml_content) + if isinstance(data, dict): + return data else: + logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict") return {} + else: + return {} @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: From f6c25d9c589b5abe12763d27fd4e80d7bd2d72fc Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Fri, 28 Feb 2025 17:01:04 +0100 Subject: [PATCH 2/3] Update gguf-py/gguf/metadata.py --- gguf-py/gguf/metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 75608a1683992..0629306bb7ce2 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -143,6 +143,7 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: # Quick hack to fix the Norway problem # https://hitchdev.com/strictyaml/why/implicit-typing-removed/ + yaml_content += "\n" yaml_content = yaml_content.replace("- no\n", "- \"no\"\n") if yaml_content: From bd40850d028a2ee43170d9ae6986b84d345b1adc Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 28 Feb 2025 17:20:48 +0100 Subject: [PATCH 3/3] add newline at correct place --- gguf-py/gguf/metadata.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 0629306bb7ce2..e807f434689de 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -139,11 +139,10 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: break # End of frontmatter else: lines_yaml.append(line) - yaml_content = "\n".join(lines_yaml) + yaml_content = "\n".join(lines_yaml) + "\n" # Quick hack to fix the Norway problem # https://hitchdev.com/strictyaml/why/implicit-typing-removed/ - yaml_content += "\n" yaml_content = yaml_content.replace("- no\n", "- \"no\"\n") if yaml_content: