11#!/usr/bin/python
22# -*- coding: utf-8 -*-
3- """Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
3+ """
4+ Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
45metadata.
56
67This plugin fetches metadata and provenance RDF for AI4EOSC models and flattens both
4546
4647
4748def _any_url_uses_http (urls ):
48- """Return True if any URL in the iterable uses http/https."""
49+ """
50+ Return True if any URL in the iterable uses http/https.
51+ """
4952 for u in urls :
5053 try :
5154 if urlparse (str (u )).scheme in HTTP_OK_SCHEMES :
@@ -56,20 +59,25 @@ def _any_url_uses_http(urls):
5659
5760
5861def _normalize (s : str ) -> str :
59- """Normalize a string by stripping and lowering."""
62+ """
63+ Normalize a string by stripping and lowering.
64+ """
6065 return (s or "" ).strip ().lower ()
6166
6267
6368def _strip_spdx_suffix (u : str ) -> str :
64- """Strip common suffixes (.html/.json) from SPDX URLs."""
69+ """
70+ Strip common suffixes (.html/.json) from SPDX URLs.
71+ """
6572 u = u .strip ()
6673 return re .sub (r"\.(html|json)$" , "" , u , flags = re .IGNORECASE )
6774
6875
6976def _build_spdx_indexes (
7077 spdx_obj : Dict ,
7178) -> Tuple [Dict [str , str ], Dict [str , str ], Dict [str , str ]]:
72- """Build three indexes to resolve user inputs to SPDX detailsUrl.
79+ """
80+ Build three indexes to resolve user inputs to SPDX detailsUrl.
7381
7482 - by licenseId
7583 - by reference (canonical HTML)
@@ -94,7 +102,8 @@ def _build_spdx_indexes(
94102
95103
96104def _load_spdx_licenses (spdx_licenses_json = None , spdx_path : str = None ) -> Dict :
97- """Load the SPDX License List JSON object.
105+ """
106+ Load the SPDX License List JSON object.
98107
99108 You can:
100109 - pass 'spdx_licenses_json' already parsed (dict),
@@ -112,7 +121,9 @@ def _load_spdx_licenses(spdx_licenses_json=None, spdx_path: str = None) -> Dict:
112121
113122
114123def _collect_urls_from_metadata (df , fields_like = None ):
115- """Extract URLs from self.metadata rows (element/text_value/qualifier)."""
124+ """
125+ Extract URLs from self.metadata rows (element/text_value/qualifier).
126+ """
116127 urls = []
117128 if df is None or len (df ) == 0 :
118129 return urls
@@ -128,23 +139,29 @@ def _collect_urls_from_metadata(df, fields_like=None):
128139
129140
130141def _has_github_repo (df ):
131- """Check if any collected URL looks like a GitHub repo."""
142+ """
143+ Check if any collected URL looks like a GitHub repo.
144+ """
132145 for u in _collect_urls_from_metadata (df ):
133146 if GITHUB_RE .search (u ):
134147 return True , u
135148 return False , None
136149
137150
138151def _fetch (url , timeout = 15 , session = None ):
139- """Fetch a URL with optional provided session."""
152+ """
153+ Fetch a URL with optional provided session.
154+ """
140155 s = session or requests .Session ()
141156 r = s .get (url , timeout = timeout , allow_redirects = True )
142157 r .raise_for_status ()
143158 return r
144159
145160
146161def _extract_jsonld_from_html (html_text ):
147- """Return JSON-LD blocks found in HTML <script type='application/ld+json'>."""
162+ """
163+ Return JSON-LD blocks found in HTML <script type='application/ld+json'>.
164+ """
148165 blocks = re .findall (
149166 r'<script[^>]+type=[\'"]application/ld\+json[\'"][^>]*>(.*?)</script>' ,
150167 html_text ,
@@ -154,7 +171,9 @@ def _extract_jsonld_from_html(html_text):
154171
155172
156173def _is_machine_actionable (page_text , content_type = None ):
157- """Try to validate JSON, JSON-LD, or RDF with rdflib."""
174+ """
175+ Try to validate JSON, JSON-LD, or RDF with rdflib.
176+ """
158177 try :
159178 _ = json .loads (page_text )
160179 return True , "json"
@@ -196,7 +215,9 @@ def _is_machine_actionable(page_text, content_type=None):
196215
197216
198217def _prov_present_as_standard (graph_or_text ):
199- """Return True if PROV-O predicates are present."""
218+ """
219+ Return True if PROV-O predicates are present.
220+ """
200221 if Graph is not None and hasattr (graph_or_text , "triples" ):
201222 for p in graph_or_text .predicates (None , None ):
202223 if str (p ).startswith (PROV_NS ):
@@ -231,7 +252,9 @@ def _prov_present_as_standard(graph_or_text):
231252
232253
233254def _filter_non_prov_fields (fields ):
234- """Filter out provenance fields ('provenance' and 'prov_*')."""
255+ """
256+ Filter out provenance fields ('provenance' and 'prov_*').
257+ """
235258 return {f for f in fields if not f .startswith ("prov_" ) and f not in {"provenance" }}
236259
237260
@@ -242,7 +265,8 @@ def _filter_non_prov_fields(fields):
242265
243266
244267class Plugin (EvaluatorBase ):
245- """FAIR EVA plugin for AI4EOSC models with provenance triples.
268+ """
269+ FAIR EVA plugin for AI4EOSC models with provenance triples.
246270
247271 This plugin captures provenance triples to enrich interoperability and provenance
248272 indicators.
@@ -256,7 +280,9 @@ def __init__(
256280 config = None ,
257281 ** kwargs ,
258282 ) -> None :
259- """Initialize plugin and load/flatten metadata and provenance graph."""
283+ """
284+ Initialize plugin and load/flatten metadata and provenance graph.
285+ """
260286 self .name = "ai4os"
261287 self .config = config
262288 self .lang = lang
@@ -316,7 +342,9 @@ def _flatten_yaml(
316342 parent_key : str = "" ,
317343 metadata : Optional [List [List [Optional [str ]]]] = None ,
318344 ) -> List [List [Optional [str ]]]:
319- """Flatten nested YAML/JSON into [schema, element, value, qualifier] rows."""
345+ """
346+ Flatten nested YAML/JSON into [schema, element, value, qualifier] rows.
347+ """
320348 if metadata is None :
321349 metadata = []
322350 if isinstance (data , dict ):
@@ -336,15 +364,18 @@ def _flatten_yaml(
336364 return metadata
337365
338366 def _slug_from_item_id (self , item_id : str ) -> str :
339- """Turn a URL-like item_id into the repo slug; otherwise return the id."""
367+ """
368+ Turn a URL-like item_id into the repo slug; otherwise return the id.
369+ """
340370 if re .match (r"https?://" , item_id ):
341371 parts = item_id .rstrip ("/" ).split ("/" )
342372 return parts [- 1 ]
343373 return item_id
344374
345375 @lru_cache (maxsize = 1 )
346376 def _spdx_license_ids (self , include_deprecated = True ):
347- """Return a set of SPDX licenseId values (optionally including deprecated).
377+ """
378+ Return a set of SPDX licenseId values (optionally including deprecated).
348379
349380 On network error, return a minimal fallback set.
350381 """
@@ -373,7 +404,8 @@ def _spdx_license_ids(self, include_deprecated=True):
373404 return frozenset (fallback )
374405
375406 def _normalize_license_candidate (self , val : str ) -> str :
376- """Normalize potential license values to licenseId-like tokens.
407+ """
408+ Normalize potential license values to licenseId-like tokens.
377409
378410 - If it is an SPDX URL (or raw in markdown), take the last path segment.
379411 - Strip typical prefixes like 'SPDX:' or 'LicenseRef-'.
@@ -391,7 +423,9 @@ def _normalize_license_candidate(self, val: str) -> str:
391423 return v
392424
393425 def get_metadata (self ) -> Tuple [List [List [Optional [str ]]], Optional [Graph ]]:
394- """Load module metadata (yaml/json) and provenance graph (JSON‑LD)."""
426+ """
427+ Load module metadata (yaml/json) and provenance graph (JSON‑LD).
428+ """
395429 namespace = "{https://ai4os.eu/metadata}"
396430 metadata_list : List [List [Optional [str ]]] = []
397431 provenance_graph : Optional [Graph ] = None
@@ -484,7 +518,9 @@ def get_metadata(self) -> Tuple[List[List[Optional[str]]], Optional[Graph]]:
484518 return metadata_list , provenance_graph
485519
486520 def rda_a1_03d (self ):
487- """Check downloadable data via GitHub or archive link."""
521+ """
522+ Check downloadable data via GitHub or archive link.
523+ """
488524 has_repo , repo_url = _has_github_repo (self .metadata )
489525 if has_repo :
490526 msg = f"Repositorio encontrado y descargable vía HTTP/HTTPS: { repo_url } "
@@ -506,7 +542,9 @@ def rda_a1_03d(self):
506542 ]
507543
508544 def rda_a1_04m (self ):
509- """Use of standardized protocol (HTTP/HTTPS) for metadata."""
545+ """
546+ Use of standardized protocol (HTTP/HTTPS) for metadata.
547+ """
510548 urls = _collect_urls_from_metadata (self .metadata )
511549 if _any_url_uses_http (urls ):
512550 return 100 , [
@@ -574,7 +612,9 @@ def rda_a1_05d(self):
574612 return 100 , [{"message" : msg_ok , "points" : 100 }]
575613
576614 def rda_a1_1_01m (self ):
577- """Use of open/free protocol (A1.1) for metadata."""
615+ """
616+ Use of open/free protocol (A1.1) for metadata.
617+ """
578618 urls = _collect_urls_from_metadata (self .metadata )
579619 if _any_url_uses_http (urls ):
580620 return 100 , [
@@ -692,7 +732,9 @@ def _is_prov(element: str) -> bool:
692732 return points , [{"message" : msg , "points" : points }]
693733
694734 def rda_a1_03m (self ):
695- """Alias to rda_a1_02m (same check for a superset of fields)."""
735+ """
736+ Alias to rda_a1_02m (same check for a superset of fields).
737+ """
696738 return self .rda_a1_02m ()
697739
698740 def rda_a2_01m (self ):
@@ -740,7 +782,9 @@ def rda_i3_02d(self):
740782 ]
741783
742784 def _is_persistent_identifier (self , value : str ) -> bool :
743- """Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID)."""
785+ """
786+ Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID).
787+ """
744788 if not isinstance (value , str ) or len (value ) < 6 :
745789 return False
746790 v = value .strip ().lower ()
@@ -786,7 +830,9 @@ def rda_i3_04m(self):
786830
787831 @ConfigTerms (term_id = "terms_license" )
788832 def rda_r1_1_02m (self , license_list = [], machine_readable = False , ** kwargs ):
789- """Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX)."""
833+ """
834+ Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX).
835+ """
790836 points = 0
791837
792838 terms_license = kwargs ["terms_license" ]
@@ -833,7 +879,8 @@ def rda_r1_1_03m(
833879 spdx_local_path : str = None ,
834880 ** kwargs ,
835881 ):
836- """Indicator R1.1-03M: metadata refers to a machine‑understandable license.
882+ """
883+ Indicator R1.1-03M: metadata refers to a machine‑understandable license.
837884
838885 Consider it machine‑understandable if the license maps to an SPDX entry with
839886 a `detailsUrl` (the JSON endpoint). Accept inputs as licenseId, canonical
@@ -905,7 +952,9 @@ def rda_r1_1_03m(
905952 return (points , [{"message" : msg , "points" : points }])
906953
907954 def rda_r1_3_01m (self ):
908- """Indicator RDA-R1.3-01M: metadata meets community standards."""
955+ """
956+ Indicator RDA-R1.3-01M: metadata meets community standards.
957+ """
909958 return 100 , [
910959 {
911960 "message" : "Provided in common, machine-understandable formats (no single community standard defined)." ,
@@ -914,7 +963,9 @@ def rda_r1_3_01m(self):
914963 ]
915964
916965 def rda_r1_3_01d (self ):
917- """Indicator RDA-R1.3-01D: dataset meets community standards."""
966+ """
967+ Indicator RDA-R1.3-01D: dataset meets community standards.
968+ """
918969 return 100 , [
919970 {
920971 "message" : "Dataset provided in common, machine-understandable formats (no single community standard defined)." ,
@@ -923,7 +974,9 @@ def rda_r1_3_01d(self):
923974 ]
924975
925976 def rda_r1_3_02m (self ):
926- """Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards."""
977+ """
978+ Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards.
979+ """
927980 return 100 , [
928981 {
929982 "message" : "Metadata expressed in common, machine-understandable formats; community standard not uniquely defined." ,
@@ -932,7 +985,9 @@ def rda_r1_3_02m(self):
932985 ]
933986
934987 def rda_r1_3_02d (self ):
935- """Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards."""
988+ """
989+ Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards.
990+ """
936991 return 100 , [
937992 {
938993 "message" : "Data provided in common, machine-understandable formats; community standard not uniquely defined." ,
@@ -941,7 +996,9 @@ def rda_r1_3_02d(self):
941996 ]
942997
943998 def rda_i1_02d (self ):
944- """Check dataset URLs for machine‑actionable representations."""
999+ """
1000+ Check dataset URLs for machine‑actionable representations.
1001+ """
9451002 urls = [
9461003 v
9471004 for v in _collect_urls_from_metadata (self .metadata )
@@ -968,7 +1025,9 @@ def rda_i1_02d(self):
9681025 ]
9691026
9701027 def rda_r1_2_01m (self ):
971- """Indicator R1.2-01M: metadata includes provenance information."""
1028+ """
1029+ Indicator R1.2-01M: metadata includes provenance information.
1030+ """
9721031 if self .provenance_graph and Graph is not None :
9731032 points = 100
9741033 msg = [
0 commit comments