1- """Simple wrappers for Hugging Face model search/detail retrieval that mirror
2- Dataset‑handlers style (single mapping helper + thin search/get functions).
3- """
41from __future__ import annotations
52
63from objects import thing , CreativeWork , Author
74from sources import data_retriever
85import utils
96from main import app
107
8+ import requests
119
12- def map_entry_to_model (record ) -> CreativeWork :
13- """Convert a single Huggingface model record into a :class:`CreativeWork`."""
10+
11+ def map_entry_to_model (record , request_readme : bool = False ) -> CreativeWork :
12+ """Convert a single Huggingface model record into a `CreativeWork` object."""
1413
1514 model = CreativeWork () # thing -> CreativeWork
1615
@@ -19,7 +18,20 @@ def map_entry_to_model(record) -> CreativeWork:
1918 model .additionalType = "MODEL"
2019 model .url = f"https://huggingface.co/{ model .name } "
2120
22- model .description = utils .remove_html_tags (record .get ("description" , "" ))
21+ # model descriptions are usually contained in a README file, which we will request separately
22+ if request_readme :
23+ readme_url = f"https://huggingface.co/{ model .name } /raw/main/README.md"
24+ try :
25+ response = requests .get (readme_url , timeout = 5 )
26+ if response .status_code == 200 :
27+ model .description = utils .remove_html_tags (response .text )
28+ else :
29+ model .description = utils .remove_html_tags (record .get ("description" , "" ))
30+ except requests .RequestException :
31+ model .description = utils .remove_html_tags (record .get ("description" , "" ))
32+ else :
33+ model .description = utils .remove_html_tags (record .get ("description" , "" ))
34+
2335 model .abstract = model .description
2436 model .dateCreated = record .get ("createdAt" , "" )
2537 model .datePublished = model .dateCreated
@@ -59,7 +71,7 @@ def map_entry_to_model(record) -> CreativeWork:
5971
6072@utils .handle_exceptions
6173def search (source : str , search_term : str , results , failed_sources ):
62- """Populate * results['resources']* with models matching *search_term*."""
74+ """Populate results['resources'] with models matching *search_term*."""
6375 search_result = data_retriever .retrieve_data (
6476 source = source ,
6577 base_url = app .config ["DATA_SOURCES" ][source ].get ("search-endpoint" , "" ),
@@ -74,7 +86,8 @@ def search(source: str, search_term: str, results, failed_sources):
7486 utils .log_event (type = "info" , message = f"{ source } - { total_hits } records matched" )
7587
7688 for hit in search_result :
77- model = map_entry_to_model (hit )
89+ # here we do not request the README to keep search fast and API volume low
90+ model = map_entry_to_model (hit , request_readme = False )
7891 results ["resources" ].append (model )
7992
8093
@@ -89,7 +102,7 @@ def get_resource(source: str, source_id: str, identifier: str):
89102 )
90103
91104 if search_result :
92- model = map_entry_to_model (search_result )
105+ model = map_entry_to_model (search_result , request_readme = True )
93106 utils .log_event (type = "info" , message = f"{ source } - retrieved model details" )
94107 return model
95108 else :
0 commit comments