1
1
#!/usr/bin/python
2
2
# -*- coding: utf-8 -*-
3
- """Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
3
+ """
4
+ Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
4
5
metadata.
5
6
6
7
This plugin fetches metadata and provenance RDF for AI4EOSC models and flattens both
45
46
46
47
47
48
def _any_url_uses_http (urls ):
48
- """Return True if any URL in the iterable uses http/https."""
49
+ """
50
+ Return True if any URL in the iterable uses http/https.
51
+ """
49
52
for u in urls :
50
53
try :
51
54
if urlparse (str (u )).scheme in HTTP_OK_SCHEMES :
@@ -56,20 +59,25 @@ def _any_url_uses_http(urls):
56
59
57
60
58
61
def _normalize (s : str ) -> str :
59
- """Normalize a string by stripping and lowering."""
62
+ """
63
+ Normalize a string by stripping and lowering.
64
+ """
60
65
return (s or "" ).strip ().lower ()
61
66
62
67
63
68
def _strip_spdx_suffix (u : str ) -> str :
64
- """Strip common suffixes (.html/.json) from SPDX URLs."""
69
+ """
70
+ Strip common suffixes (.html/.json) from SPDX URLs.
71
+ """
65
72
u = u .strip ()
66
73
return re .sub (r"\.(html|json)$" , "" , u , flags = re .IGNORECASE )
67
74
68
75
69
76
def _build_spdx_indexes (
70
77
spdx_obj : Dict ,
71
78
) -> Tuple [Dict [str , str ], Dict [str , str ], Dict [str , str ]]:
72
- """Build three indexes to resolve user inputs to SPDX detailsUrl.
79
+ """
80
+ Build three indexes to resolve user inputs to SPDX detailsUrl.
73
81
74
82
- by licenseId
75
83
- by reference (canonical HTML)
@@ -94,7 +102,8 @@ def _build_spdx_indexes(
94
102
95
103
96
104
def _load_spdx_licenses (spdx_licenses_json = None , spdx_path : str = None ) -> Dict :
97
- """Load the SPDX License List JSON object.
105
+ """
106
+ Load the SPDX License List JSON object.
98
107
99
108
You can:
100
109
- pass 'spdx_licenses_json' already parsed (dict),
@@ -112,7 +121,9 @@ def _load_spdx_licenses(spdx_licenses_json=None, spdx_path: str = None) -> Dict:
112
121
113
122
114
123
def _collect_urls_from_metadata (df , fields_like = None ):
115
- """Extract URLs from self.metadata rows (element/text_value/qualifier)."""
124
+ """
125
+ Extract URLs from self.metadata rows (element/text_value/qualifier).
126
+ """
116
127
urls = []
117
128
if df is None or len (df ) == 0 :
118
129
return urls
@@ -128,23 +139,29 @@ def _collect_urls_from_metadata(df, fields_like=None):
128
139
129
140
130
141
def _has_github_repo (df ):
131
- """Check if any collected URL looks like a GitHub repo."""
142
+ """
143
+ Check if any collected URL looks like a GitHub repo.
144
+ """
132
145
for u in _collect_urls_from_metadata (df ):
133
146
if GITHUB_RE .search (u ):
134
147
return True , u
135
148
return False , None
136
149
137
150
138
151
def _fetch (url , timeout = 15 , session = None ):
139
- """Fetch a URL with optional provided session."""
152
+ """
153
+ Fetch a URL with optional provided session.
154
+ """
140
155
s = session or requests .Session ()
141
156
r = s .get (url , timeout = timeout , allow_redirects = True )
142
157
r .raise_for_status ()
143
158
return r
144
159
145
160
146
161
def _extract_jsonld_from_html (html_text ):
147
- """Return JSON-LD blocks found in HTML <script type='application/ld+json'>."""
162
+ """
163
+ Return JSON-LD blocks found in HTML <script type='application/ld+json'>.
164
+ """
148
165
blocks = re .findall (
149
166
r'<script[^>]+type=[\'"]application/ld\+json[\'"][^>]*>(.*?)</script>' ,
150
167
html_text ,
@@ -154,7 +171,9 @@ def _extract_jsonld_from_html(html_text):
154
171
155
172
156
173
def _is_machine_actionable (page_text , content_type = None ):
157
- """Try to validate JSON, JSON-LD, or RDF with rdflib."""
174
+ """
175
+ Try to validate JSON, JSON-LD, or RDF with rdflib.
176
+ """
158
177
try :
159
178
_ = json .loads (page_text )
160
179
return True , "json"
@@ -196,7 +215,9 @@ def _is_machine_actionable(page_text, content_type=None):
196
215
197
216
198
217
def _prov_present_as_standard (graph_or_text ):
199
- """Return True if PROV-O predicates are present."""
218
+ """
219
+ Return True if PROV-O predicates are present.
220
+ """
200
221
if Graph is not None and hasattr (graph_or_text , "triples" ):
201
222
for p in graph_or_text .predicates (None , None ):
202
223
if str (p ).startswith (PROV_NS ):
@@ -231,7 +252,9 @@ def _prov_present_as_standard(graph_or_text):
231
252
232
253
233
254
def _filter_non_prov_fields (fields ):
234
- """Filter out provenance fields ('provenance' and 'prov_*')."""
255
+ """
256
+ Filter out provenance fields ('provenance' and 'prov_*').
257
+ """
235
258
return {f for f in fields if not f .startswith ("prov_" ) and f not in {"provenance" }}
236
259
237
260
@@ -242,7 +265,8 @@ def _filter_non_prov_fields(fields):
242
265
243
266
244
267
class Plugin (EvaluatorBase ):
245
- """FAIR EVA plugin for AI4EOSC models with provenance triples.
268
+ """
269
+ FAIR EVA plugin for AI4EOSC models with provenance triples.
246
270
247
271
This plugin captures provenance triples to enrich interoperability and provenance
248
272
indicators.
@@ -256,7 +280,9 @@ def __init__(
256
280
config = None ,
257
281
** kwargs ,
258
282
) -> None :
259
- """Initialize plugin and load/flatten metadata and provenance graph."""
283
+ """
284
+ Initialize plugin and load/flatten metadata and provenance graph.
285
+ """
260
286
self .name = "ai4os"
261
287
self .config = config
262
288
self .lang = lang
@@ -316,7 +342,9 @@ def _flatten_yaml(
316
342
parent_key : str = "" ,
317
343
metadata : Optional [List [List [Optional [str ]]]] = None ,
318
344
) -> List [List [Optional [str ]]]:
319
- """Flatten nested YAML/JSON into [schema, element, value, qualifier] rows."""
345
+ """
346
+ Flatten nested YAML/JSON into [schema, element, value, qualifier] rows.
347
+ """
320
348
if metadata is None :
321
349
metadata = []
322
350
if isinstance (data , dict ):
@@ -336,15 +364,18 @@ def _flatten_yaml(
336
364
return metadata
337
365
338
366
def _slug_from_item_id (self , item_id : str ) -> str :
339
- """Turn a URL-like item_id into the repo slug; otherwise return the id."""
367
+ """
368
+ Turn a URL-like item_id into the repo slug; otherwise return the id.
369
+ """
340
370
if re .match (r"https?://" , item_id ):
341
371
parts = item_id .rstrip ("/" ).split ("/" )
342
372
return parts [- 1 ]
343
373
return item_id
344
374
345
375
@lru_cache (maxsize = 1 )
346
376
def _spdx_license_ids (self , include_deprecated = True ):
347
- """Return a set of SPDX licenseId values (optionally including deprecated).
377
+ """
378
+ Return a set of SPDX licenseId values (optionally including deprecated).
348
379
349
380
On network error, return a minimal fallback set.
350
381
"""
@@ -373,7 +404,8 @@ def _spdx_license_ids(self, include_deprecated=True):
373
404
return frozenset (fallback )
374
405
375
406
def _normalize_license_candidate (self , val : str ) -> str :
376
- """Normalize potential license values to licenseId-like tokens.
407
+ """
408
+ Normalize potential license values to licenseId-like tokens.
377
409
378
410
- If it is an SPDX URL (or raw in markdown), take the last path segment.
379
411
- Strip typical prefixes like 'SPDX:' or 'LicenseRef-'.
@@ -391,7 +423,9 @@ def _normalize_license_candidate(self, val: str) -> str:
391
423
return v
392
424
393
425
def get_metadata (self ) -> Tuple [List [List [Optional [str ]]], Optional [Graph ]]:
394
- """Load module metadata (yaml/json) and provenance graph (JSON‑LD)."""
426
+ """
427
+ Load module metadata (yaml/json) and provenance graph (JSON‑LD).
428
+ """
395
429
namespace = "{https://ai4os.eu/metadata}"
396
430
metadata_list : List [List [Optional [str ]]] = []
397
431
provenance_graph : Optional [Graph ] = None
@@ -484,7 +518,9 @@ def get_metadata(self) -> Tuple[List[List[Optional[str]]], Optional[Graph]]:
484
518
return metadata_list , provenance_graph
485
519
486
520
def rda_a1_03d (self ):
487
- """Check downloadable data via GitHub or archive link."""
521
+ """
522
+ Check downloadable data via GitHub or archive link.
523
+ """
488
524
has_repo , repo_url = _has_github_repo (self .metadata )
489
525
if has_repo :
490
526
msg = f"Repositorio encontrado y descargable vía HTTP/HTTPS: { repo_url } "
@@ -506,7 +542,9 @@ def rda_a1_03d(self):
506
542
]
507
543
508
544
def rda_a1_04m (self ):
509
- """Use of standardized protocol (HTTP/HTTPS) for metadata."""
545
+ """
546
+ Use of standardized protocol (HTTP/HTTPS) for metadata.
547
+ """
510
548
urls = _collect_urls_from_metadata (self .metadata )
511
549
if _any_url_uses_http (urls ):
512
550
return 100 , [
@@ -574,7 +612,9 @@ def rda_a1_05d(self):
574
612
return 100 , [{"message" : msg_ok , "points" : 100 }]
575
613
576
614
def rda_a1_1_01m (self ):
577
- """Use of open/free protocol (A1.1) for metadata."""
615
+ """
616
+ Use of open/free protocol (A1.1) for metadata.
617
+ """
578
618
urls = _collect_urls_from_metadata (self .metadata )
579
619
if _any_url_uses_http (urls ):
580
620
return 100 , [
@@ -692,7 +732,9 @@ def _is_prov(element: str) -> bool:
692
732
return points , [{"message" : msg , "points" : points }]
693
733
694
734
def rda_a1_03m (self ):
695
- """Alias to rda_a1_02m (same check for a superset of fields)."""
735
+ """
736
+ Alias to rda_a1_02m (same check for a superset of fields).
737
+ """
696
738
return self .rda_a1_02m ()
697
739
698
740
def rda_a2_01m (self ):
@@ -740,7 +782,9 @@ def rda_i3_02d(self):
740
782
]
741
783
742
784
def _is_persistent_identifier (self , value : str ) -> bool :
743
- """Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID)."""
785
+ """
786
+ Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID).
787
+ """
744
788
if not isinstance (value , str ) or len (value ) < 6 :
745
789
return False
746
790
v = value .strip ().lower ()
@@ -786,7 +830,9 @@ def rda_i3_04m(self):
786
830
787
831
@ConfigTerms (term_id = "terms_license" )
788
832
def rda_r1_1_02m (self , license_list = [], machine_readable = False , ** kwargs ):
789
- """Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX)."""
833
+ """
834
+ Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX).
835
+ """
790
836
points = 0
791
837
792
838
terms_license = kwargs ["terms_license" ]
@@ -833,7 +879,8 @@ def rda_r1_1_03m(
833
879
spdx_local_path : str = None ,
834
880
** kwargs ,
835
881
):
836
- """Indicator R1.1-03M: metadata refers to a machine‑understandable license.
882
+ """
883
+ Indicator R1.1-03M: metadata refers to a machine‑understandable license.
837
884
838
885
Consider it machine‑understandable if the license maps to an SPDX entry with
839
886
a `detailsUrl` (the JSON endpoint). Accept inputs as licenseId, canonical
@@ -905,7 +952,9 @@ def rda_r1_1_03m(
905
952
return (points , [{"message" : msg , "points" : points }])
906
953
907
954
def rda_r1_3_01m (self ):
908
- """Indicator RDA-R1.3-01M: metadata meets community standards."""
955
+ """
956
+ Indicator RDA-R1.3-01M: metadata meets community standards.
957
+ """
909
958
return 100 , [
910
959
{
911
960
"message" : "Provided in common, machine-understandable formats (no single community standard defined)." ,
@@ -914,7 +963,9 @@ def rda_r1_3_01m(self):
914
963
]
915
964
916
965
def rda_r1_3_01d (self ):
917
- """Indicator RDA-R1.3-01D: dataset meets community standards."""
966
+ """
967
+ Indicator RDA-R1.3-01D: dataset meets community standards.
968
+ """
918
969
return 100 , [
919
970
{
920
971
"message" : "Dataset provided in common, machine-understandable formats (no single community standard defined)." ,
@@ -923,7 +974,9 @@ def rda_r1_3_01d(self):
923
974
]
924
975
925
976
def rda_r1_3_02m (self ):
926
- """Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards."""
977
+ """
978
+ Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards.
979
+ """
927
980
return 100 , [
928
981
{
929
982
"message" : "Metadata expressed in common, machine-understandable formats; community standard not uniquely defined." ,
@@ -932,7 +985,9 @@ def rda_r1_3_02m(self):
932
985
]
933
986
934
987
def rda_r1_3_02d (self ):
935
- """Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards."""
988
+ """
989
+ Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards.
990
+ """
936
991
return 100 , [
937
992
{
938
993
"message" : "Data provided in common, machine-understandable formats; community standard not uniquely defined." ,
@@ -941,7 +996,9 @@ def rda_r1_3_02d(self):
941
996
]
942
997
943
998
def rda_i1_02d (self ):
944
- """Check dataset URLs for machine‑actionable representations."""
999
+ """
1000
+ Check dataset URLs for machine‑actionable representations.
1001
+ """
945
1002
urls = [
946
1003
v
947
1004
for v in _collect_urls_from_metadata (self .metadata )
@@ -968,7 +1025,9 @@ def rda_i1_02d(self):
968
1025
]
969
1026
970
1027
def rda_r1_2_01m (self ):
971
- """Indicator R1.2-01M: metadata includes provenance information."""
1028
+ """
1029
+ Indicator R1.2-01M: metadata includes provenance information.
1030
+ """
972
1031
if self .provenance_graph and Graph is not None :
973
1032
points = 100
974
1033
msg = [
0 commit comments