@@ -102,7 +102,7 @@ def _create_model_card(
102102
103103def load_pretrained (
104104 folder_or_repo_path : str | Path , token : str | None = None
105- ) -> tuple [np .ndarray , Tokenizer , dict [str , Any ]]:
105+ ) -> tuple [np .ndarray , Tokenizer , dict [str , Any ], dict [ str , Any ] ]:
106106 """
107107 Loads a pretrained model from a folder.
108108
@@ -111,7 +111,7 @@ def load_pretrained(
111111 - If the local path is not found, we will attempt to load from the huggingface hub.
112112 :param token: The huggingface token to use.
113113 :raises: FileNotFoundError if the folder exists, but the file does not exist locally.
114- :return: The embeddings, tokenizer, and config .
114+ :return: The embeddings, tokenizer, config, and metadata .
115115
116116 """
117117 folder_or_repo_path = Path (folder_or_repo_path )
@@ -133,6 +133,10 @@ def load_pretrained(
133133 if not tokenizer_path .exists ():
134134 raise FileNotFoundError (f"Tokenizer file does not exist in { folder_or_repo_path } " )
135135
136+ # README is optional, so this is a bit finicky.
137+ readme_path = folder_or_repo_path / "README.md"
138+ metadata = _get_metadata_from_readme (readme_path )
139+
136140 else :
137141 logger .info ("Folder does not exist locally, attempting to use huggingface hub." )
138142 try :
@@ -148,6 +152,13 @@ def load_pretrained(
148152 # Raise original exception.
149153 raise e
150154
155+ try :
156+ readme_path = huggingface_hub .hf_hub_download (folder_or_repo_path .as_posix (), "README.md" , token = token )
157+ metadata = _get_metadata_from_readme (Path (readme_path ))
158+ except huggingface_hub .utils .EntryNotFoundError :
159+ logger .info ("No README found in the model folder. No model card loaded." )
160+ metadata = {}
161+
151162 config_path = huggingface_hub .hf_hub_download (folder_or_repo_path .as_posix (), "config.json" , token = token )
152163 tokenizer_path = huggingface_hub .hf_hub_download (folder_or_repo_path .as_posix (), "tokenizer.json" , token = token )
153164
@@ -162,7 +173,19 @@ def load_pretrained(
162173 f"Number of tokens does not match number of embeddings: `{ len (tokenizer .get_vocab ())} ` vs `{ len (embeddings )} `"
163174 )
164175
165- return embeddings , tokenizer , config
176+ return embeddings , tokenizer , config , metadata
177+
178+
179+ def _get_metadata_from_readme (readme_path : Path ) -> dict [str , Any ]:
180+ """Get metadata from a README file."""
181+ if not readme_path .exists ():
182+ logger .info (f"README file not found in { readme_path } . No model card loaded." )
183+ return {}
184+ model_card = ModelCard .load (readme_path )
185+ data : dict [str , Any ] = model_card .data .to_dict ()
186+ if not data :
187+ logger .info ("File README.md exists, but was empty. No model card loaded." )
188+ return data
166189
167190
168191def push_folder_to_hub (folder_path : Path , repo_id : str , private : bool , token : str | None ) -> None :
0 commit comments