@@ -284,20 +284,67 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
284284 ########################
285285 if model_card is not None :
286286
287- if "model_name" in model_card and metadata .name is None :
288- # Not part of huggingface model card standard but notice some model creator using it
289- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
290- metadata .name = model_card .get ("model_name" )
287+ def use_model_card_metadata (metadata_key : str , model_card_key : str ):
288+ if model_card_key in model_card and getattr (metadata , metadata_key , None ) is None :
289+ setattr (metadata , metadata_key , model_card .get (model_card_key ))
291290
292- if "model_creator" in model_card and metadata .author is None :
293- # Not part of huggingface model card standard but notice some model creator using it
294- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
295- metadata .author = model_card .get ("model_creator" )
291+ def use_array_model_card_metadata (metadata_key : str , model_card_key : str ):
292+ # Note: Will append rather than replace if already exist
293+ tags_value = model_card .get (model_card_key , None )
294+ if tags_value is None :
295+ return
296296
297- if "model_type" in model_card and metadata .basename is None :
298- # Not part of huggingface model card standard but notice some model creator using it
299- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
300- metadata .basename = model_card .get ("model_type" )
297+ current_value = getattr (metadata , metadata_key , None )
298+ if current_value is None :
299+ current_value = []
300+
301+ if isinstance (tags_value , str ):
302+ current_value .append (tags_value )
303+ elif isinstance (tags_value , list ):
304+ current_value .extend (tags_value )
305+
306+ setattr (metadata , metadata_key , current_value )
307+
308+ # LLAMA.cpp's direct internal convention
309+ # (Definitely not part of hugging face formal/informal standard)
310+ #########################################
311+ use_model_card_metadata ("name" , "name" )
312+ use_model_card_metadata ("author" , "author" )
313+ use_model_card_metadata ("version" , "version" )
314+ use_model_card_metadata ("organization" , "organization" )
315+ use_model_card_metadata ("description" , "description" )
316+ use_model_card_metadata ("finetune" , "finetune" )
317+ use_model_card_metadata ("basename" , "basename" )
318+ use_model_card_metadata ("size_label" , "size_label" )
319+ use_model_card_metadata ("source_url" , "url" )
320+ use_model_card_metadata ("source_doi" , "doi" )
321+ use_model_card_metadata ("source_uuid" , "uuid" )
322+ use_model_card_metadata ("source_repo_url" , "repo_url" )
323+
324+ # LLAMA.cpp's huggingface style convention
325+ # (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
326+ ###########################################
327+ use_model_card_metadata ("name" , "model_name" )
328+ use_model_card_metadata ("author" , "model_author" )
329+ use_model_card_metadata ("version" , "model_version" )
330+ use_model_card_metadata ("organization" , "model_organization" )
331+ use_model_card_metadata ("description" , "model_description" )
332+ use_model_card_metadata ("finetune" , "model_finetune" )
333+ use_model_card_metadata ("basename" , "model_basename" )
334+ use_model_card_metadata ("size_label" , "model_size_label" )
335+ use_model_card_metadata ("source_url" , "model_url" )
336+ use_model_card_metadata ("source_doi" , "model_doi" )
337+ use_model_card_metadata ("source_uuid" , "model_uuid" )
338+ use_model_card_metadata ("source_repo_url" , "model_repo_url" )
339+
340+ # Hugging Face Direct Convention
341+ #################################
342+
343+ # Not part of huggingface model card standard but notice some model creator using it
344+ # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
345+ use_model_card_metadata ("name" , "model_name" )
346+ use_model_card_metadata ("author" , "model_creator" )
347+ use_model_card_metadata ("basename" , "model_type" )
301348
302349 if "base_model" in model_card :
303350 # This represents the parent models that this is based on
@@ -329,58 +376,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
329376 base_model ["repo_url" ] = f"https://huggingface.co/{ org_component } /{ model_full_name_component } "
330377 metadata .base_models .append (base_model )
331378
332- if "license" in model_card and metadata .license is None :
333- metadata .license = model_card .get ("license" )
334-
335- if "license_name" in model_card and metadata .license_name is None :
336- metadata .license_name = model_card .get ("license_name" )
337-
338- if "license_link" in model_card and metadata .license_link is None :
339- metadata .license_link = model_card .get ("license_link" )
340-
341- tags_value = model_card .get ("tags" , None )
342- if tags_value is not None :
343-
344- if metadata .tags is None :
345- metadata .tags = []
346-
347- if isinstance (tags_value , str ):
348- metadata .tags .append (tags_value )
349- elif isinstance (tags_value , list ):
350- metadata .tags .extend (tags_value )
351-
352- pipeline_tags_value = model_card .get ("pipeline_tag" , None )
353- if pipeline_tags_value is not None :
354-
355- if metadata .tags is None :
356- metadata .tags = []
357-
358- if isinstance (pipeline_tags_value , str ):
359- metadata .tags .append (pipeline_tags_value )
360- elif isinstance (pipeline_tags_value , list ):
361- metadata .tags .extend (pipeline_tags_value )
362-
363- language_value = model_card .get ("languages" , model_card .get ("language" , None ))
364- if language_value is not None :
365-
366- if metadata .languages is None :
367- metadata .languages = []
368-
369- if isinstance (language_value , str ):
370- metadata .languages .append (language_value )
371- elif isinstance (language_value , list ):
372- metadata .languages .extend (language_value )
379+ use_model_card_metadata ("license" , "license" )
380+ use_model_card_metadata ("license_name" , "license_name" )
381+ use_model_card_metadata ("license_link" , "license_link" )
373382
374- dataset_value = model_card . get ( "datasets " , model_card . get ( "dataset" , None ) )
375- if dataset_value is not None :
383+ use_array_model_card_metadata ( "tags " , "tags" )
384+ use_array_model_card_metadata ( "tags" , "pipeline_tag" )
376385
377- if metadata . datasets is None :
378- metadata . datasets = []
386+ use_array_model_card_metadata ( "languages" , "languages" )
387+ use_array_model_card_metadata ( "languages" , "language" )
379388
380- if isinstance (dataset_value , str ):
381- metadata .datasets .append (dataset_value )
382- elif isinstance (dataset_value , list ):
383- metadata .datasets .extend (dataset_value )
389+ use_array_model_card_metadata ("datasets" , "datasets" )
390+ use_array_model_card_metadata ("datasets" , "dataset" )
384391
385392 # Hugging Face Parameter Heuristics
386393 ####################################
0 commit comments