Skip to content

Commit a78a0cc

Browse files
committed
Solving issues
1 parent 8e0b335 commit a78a0cc

File tree

2 files changed

+102
-36
lines changed

2 files changed

+102
-36
lines changed

.pre-commit-config.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@ repos:
1111
hooks:
1212
- id: black
1313
- repo: https://github.com/PyCQA/docformatter
14-
rev: master
14+
rev: v1.7.5
1515
hooks:
16-
- id: docformatter
17-
additional_dependencies: [tomli]
16+
- id: docformatter
17+
args:
18+
- --in-place
19+
- --wrap-summaries=88
20+
- --wrap-descriptions=88
21+
- --make-summary-multi-line
22+
- --pre-summary-newline
23+
additional_dependencies: [tomli]
24+
language_version: python3.12 # << evita el bug de 3.13
1825
- repo: https://github.com/pre-commit/mirrors-isort
1926
rev: v5.10.1
2027
hooks:

plugins/ai4os/plugin.py

Lines changed: 92 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/python
22
# -*- coding: utf-8 -*-
3-
"""Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
3+
"""
4+
Plugin to evaluate AI4EOSC models for FAIR EVA, enhanced with detailed provenance
45
metadata.
56
67
This plugin fetches metadata and provenance RDF for AI4EOSC models and flattens both
@@ -45,7 +46,9 @@
4546

4647

4748
def _any_url_uses_http(urls):
48-
"""Return True if any URL in the iterable uses http/https."""
49+
"""
50+
Return True if any URL in the iterable uses http/https.
51+
"""
4952
for u in urls:
5053
try:
5154
if urlparse(str(u)).scheme in HTTP_OK_SCHEMES:
@@ -56,20 +59,25 @@ def _any_url_uses_http(urls):
5659

5760

5861
def _normalize(s: str) -> str:
59-
"""Normalize a string by stripping and lowering."""
62+
"""
63+
Normalize a string by stripping and lowering.
64+
"""
6065
return (s or "").strip().lower()
6166

6267

6368
def _strip_spdx_suffix(u: str) -> str:
64-
"""Strip common suffixes (.html/.json) from SPDX URLs."""
69+
"""
70+
Strip common suffixes (.html/.json) from SPDX URLs.
71+
"""
6572
u = u.strip()
6673
return re.sub(r"\.(html|json)$", "", u, flags=re.IGNORECASE)
6774

6875

6976
def _build_spdx_indexes(
7077
spdx_obj: Dict,
7178
) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
72-
"""Build three indexes to resolve user inputs to SPDX detailsUrl.
79+
"""
80+
Build three indexes to resolve user inputs to SPDX detailsUrl.
7381
7482
- by licenseId
7583
- by reference (canonical HTML)
@@ -94,7 +102,8 @@ def _build_spdx_indexes(
94102

95103

96104
def _load_spdx_licenses(spdx_licenses_json=None, spdx_path: str = None) -> Dict:
97-
"""Load the SPDX License List JSON object.
105+
"""
106+
Load the SPDX License List JSON object.
98107
99108
You can:
100109
- pass 'spdx_licenses_json' already parsed (dict),
@@ -112,7 +121,9 @@ def _load_spdx_licenses(spdx_licenses_json=None, spdx_path: str = None) -> Dict:
112121

113122

114123
def _collect_urls_from_metadata(df, fields_like=None):
115-
"""Extract URLs from self.metadata rows (element/text_value/qualifier)."""
124+
"""
125+
Extract URLs from self.metadata rows (element/text_value/qualifier).
126+
"""
116127
urls = []
117128
if df is None or len(df) == 0:
118129
return urls
@@ -128,23 +139,29 @@ def _collect_urls_from_metadata(df, fields_like=None):
128139

129140

130141
def _has_github_repo(df):
131-
"""Check if any collected URL looks like a GitHub repo."""
142+
"""
143+
Check if any collected URL looks like a GitHub repo.
144+
"""
132145
for u in _collect_urls_from_metadata(df):
133146
if GITHUB_RE.search(u):
134147
return True, u
135148
return False, None
136149

137150

138151
def _fetch(url, timeout=15, session=None):
139-
"""Fetch a URL with optional provided session."""
152+
"""
153+
Fetch a URL with optional provided session.
154+
"""
140155
s = session or requests.Session()
141156
r = s.get(url, timeout=timeout, allow_redirects=True)
142157
r.raise_for_status()
143158
return r
144159

145160

146161
def _extract_jsonld_from_html(html_text):
147-
"""Return JSON-LD blocks found in HTML <script type='application/ld+json'>."""
162+
"""
163+
Return JSON-LD blocks found in HTML <script type='application/ld+json'>.
164+
"""
148165
blocks = re.findall(
149166
r'<script[^>]+type=[\'"]application/ld\+json[\'"][^>]*>(.*?)</script>',
150167
html_text,
@@ -154,7 +171,9 @@ def _extract_jsonld_from_html(html_text):
154171

155172

156173
def _is_machine_actionable(page_text, content_type=None):
157-
"""Try to validate JSON, JSON-LD, or RDF with rdflib."""
174+
"""
175+
Try to validate JSON, JSON-LD, or RDF with rdflib.
176+
"""
158177
try:
159178
_ = json.loads(page_text)
160179
return True, "json"
@@ -196,7 +215,9 @@ def _is_machine_actionable(page_text, content_type=None):
196215

197216

198217
def _prov_present_as_standard(graph_or_text):
199-
"""Return True if PROV-O predicates are present."""
218+
"""
219+
Return True if PROV-O predicates are present.
220+
"""
200221
if Graph is not None and hasattr(graph_or_text, "triples"):
201222
for p in graph_or_text.predicates(None, None):
202223
if str(p).startswith(PROV_NS):
@@ -231,7 +252,9 @@ def _prov_present_as_standard(graph_or_text):
231252

232253

233254
def _filter_non_prov_fields(fields):
234-
"""Filter out provenance fields ('provenance' and 'prov_*')."""
255+
"""
256+
Filter out provenance fields ('provenance' and 'prov_*').
257+
"""
235258
return {f for f in fields if not f.startswith("prov_") and f not in {"provenance"}}
236259

237260

@@ -242,7 +265,8 @@ def _filter_non_prov_fields(fields):
242265

243266

244267
class Plugin(EvaluatorBase):
245-
"""FAIR EVA plugin for AI4EOSC models with provenance triples.
268+
"""
269+
FAIR EVA plugin for AI4EOSC models with provenance triples.
246270
247271
This plugin captures provenance triples to enrich interoperability and provenance
248272
indicators.
@@ -256,7 +280,9 @@ def __init__(
256280
config=None,
257281
**kwargs,
258282
) -> None:
259-
"""Initialize plugin and load/flatten metadata and provenance graph."""
283+
"""
284+
Initialize plugin and load/flatten metadata and provenance graph.
285+
"""
260286
self.name = "ai4os"
261287
self.config = config
262288
self.lang = lang
@@ -316,7 +342,9 @@ def _flatten_yaml(
316342
parent_key: str = "",
317343
metadata: Optional[List[List[Optional[str]]]] = None,
318344
) -> List[List[Optional[str]]]:
319-
"""Flatten nested YAML/JSON into [schema, element, value, qualifier] rows."""
345+
"""
346+
Flatten nested YAML/JSON into [schema, element, value, qualifier] rows.
347+
"""
320348
if metadata is None:
321349
metadata = []
322350
if isinstance(data, dict):
@@ -336,15 +364,18 @@ def _flatten_yaml(
336364
return metadata
337365

338366
def _slug_from_item_id(self, item_id: str) -> str:
339-
"""Turn a URL-like item_id into the repo slug; otherwise return the id."""
367+
"""
368+
Turn a URL-like item_id into the repo slug; otherwise return the id.
369+
"""
340370
if re.match(r"https?://", item_id):
341371
parts = item_id.rstrip("/").split("/")
342372
return parts[-1]
343373
return item_id
344374

345375
@lru_cache(maxsize=1)
346376
def _spdx_license_ids(self, include_deprecated=True):
347-
"""Return a set of SPDX licenseId values (optionally including deprecated).
377+
"""
378+
Return a set of SPDX licenseId values (optionally including deprecated).
348379
349380
On network error, return a minimal fallback set.
350381
"""
@@ -373,7 +404,8 @@ def _spdx_license_ids(self, include_deprecated=True):
373404
return frozenset(fallback)
374405

375406
def _normalize_license_candidate(self, val: str) -> str:
376-
"""Normalize potential license values to licenseId-like tokens.
407+
"""
408+
Normalize potential license values to licenseId-like tokens.
377409
378410
- If it is an SPDX URL (or raw in markdown), take the last path segment.
379411
- Strip typical prefixes like 'SPDX:' or 'LicenseRef-'.
@@ -391,7 +423,9 @@ def _normalize_license_candidate(self, val: str) -> str:
391423
return v
392424

393425
def get_metadata(self) -> Tuple[List[List[Optional[str]]], Optional[Graph]]:
394-
"""Load module metadata (yaml/json) and provenance graph (JSON‑LD)."""
426+
"""
427+
Load module metadata (yaml/json) and provenance graph (JSON‑LD).
428+
"""
395429
namespace = "{https://ai4os.eu/metadata}"
396430
metadata_list: List[List[Optional[str]]] = []
397431
provenance_graph: Optional[Graph] = None
@@ -484,7 +518,9 @@ def get_metadata(self) -> Tuple[List[List[Optional[str]]], Optional[Graph]]:
484518
return metadata_list, provenance_graph
485519

486520
def rda_a1_03d(self):
487-
"""Check downloadable data via GitHub or archive link."""
521+
"""
522+
Check downloadable data via GitHub or archive link.
523+
"""
488524
has_repo, repo_url = _has_github_repo(self.metadata)
489525
if has_repo:
490526
msg = f"Repositorio encontrado y descargable vía HTTP/HTTPS: {repo_url}"
@@ -506,7 +542,9 @@ def rda_a1_03d(self):
506542
]
507543

508544
def rda_a1_04m(self):
509-
"""Use of standardized protocol (HTTP/HTTPS) for metadata."""
545+
"""
546+
Use of standardized protocol (HTTP/HTTPS) for metadata.
547+
"""
510548
urls = _collect_urls_from_metadata(self.metadata)
511549
if _any_url_uses_http(urls):
512550
return 100, [
@@ -574,7 +612,9 @@ def rda_a1_05d(self):
574612
return 100, [{"message": msg_ok, "points": 100}]
575613

576614
def rda_a1_1_01m(self):
577-
"""Use of open/free protocol (A1.1) for metadata."""
615+
"""
616+
Use of open/free protocol (A1.1) for metadata.
617+
"""
578618
urls = _collect_urls_from_metadata(self.metadata)
579619
if _any_url_uses_http(urls):
580620
return 100, [
@@ -692,7 +732,9 @@ def _is_prov(element: str) -> bool:
692732
return points, [{"message": msg, "points": points}]
693733

694734
def rda_a1_03m(self):
695-
"""Alias to rda_a1_02m (same check for a superset of fields)."""
735+
"""
736+
Alias to rda_a1_02m (same check for a superset of fields).
737+
"""
696738
return self.rda_a1_02m()
697739

698740
def rda_a2_01m(self):
@@ -740,7 +782,9 @@ def rda_i3_02d(self):
740782
]
741783

742784
def _is_persistent_identifier(self, value: str) -> bool:
743-
"""Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID)."""
785+
"""
786+
Heuristic check for PID patterns (DOI/Handle/ARK/PURL/W3ID/URN/ORCID).
787+
"""
744788
if not isinstance(value, str) or len(value) < 6:
745789
return False
746790
v = value.strip().lower()
@@ -786,7 +830,9 @@ def rda_i3_04m(self):
786830

787831
@ConfigTerms(term_id="terms_license")
788832
def rda_r1_1_02m(self, license_list=[], machine_readable=False, **kwargs):
789-
"""Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX)."""
833+
"""
834+
Indicator R1.1-02M: metadata refers to a standard reuse license (SPDX).
835+
"""
790836
points = 0
791837

792838
terms_license = kwargs["terms_license"]
@@ -833,7 +879,8 @@ def rda_r1_1_03m(
833879
spdx_local_path: str = None,
834880
**kwargs,
835881
):
836-
"""Indicator R1.1-03M: metadata refers to a machine‑understandable license.
882+
"""
883+
Indicator R1.1-03M: metadata refers to a machine‑understandable license.
837884
838885
Consider it machine‑understandable if the license maps to an SPDX entry with
839886
a `detailsUrl` (the JSON endpoint). Accept inputs as licenseId, canonical
@@ -905,7 +952,9 @@ def rda_r1_1_03m(
905952
return (points, [{"message": msg, "points": points}])
906953

907954
def rda_r1_3_01m(self):
908-
"""Indicator RDA-R1.3-01M: metadata meets community standards."""
955+
"""
956+
Indicator RDA-R1.3-01M: metadata meets community standards.
957+
"""
909958
return 100, [
910959
{
911960
"message": "Provided in common, machine-understandable formats (no single community standard defined).",
@@ -914,7 +963,9 @@ def rda_r1_3_01m(self):
914963
]
915964

916965
def rda_r1_3_01d(self):
917-
"""Indicator RDA-R1.3-01D: dataset meets community standards."""
966+
"""
967+
Indicator RDA-R1.3-01D: dataset meets community standards.
968+
"""
918969
return 100, [
919970
{
920971
"message": "Dataset provided in common, machine-understandable formats (no single community standard defined).",
@@ -923,7 +974,9 @@ def rda_r1_3_01d(self):
923974
]
924975

925976
def rda_r1_3_02m(self):
926-
"""Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards."""
977+
"""
978+
Indicator RDA-R1.3-02M: metadata uses appropriate vocabularies/standards.
979+
"""
927980
return 100, [
928981
{
929982
"message": "Metadata expressed in common, machine-understandable formats; community standard not uniquely defined.",
@@ -932,7 +985,9 @@ def rda_r1_3_02m(self):
932985
]
933986

934987
def rda_r1_3_02d(self):
935-
"""Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards."""
988+
"""
989+
Indicator RDA-R1.3-02D: data uses appropriate vocabularies/standards.
990+
"""
936991
return 100, [
937992
{
938993
"message": "Data provided in common, machine-understandable formats; community standard not uniquely defined.",
@@ -941,7 +996,9 @@ def rda_r1_3_02d(self):
941996
]
942997

943998
def rda_i1_02d(self):
944-
"""Check dataset URLs for machine‑actionable representations."""
999+
"""
1000+
Check dataset URLs for machine‑actionable representations.
1001+
"""
9451002
urls = [
9461003
v
9471004
for v in _collect_urls_from_metadata(self.metadata)
@@ -968,7 +1025,9 @@ def rda_i1_02d(self):
9681025
]
9691026

9701027
def rda_r1_2_01m(self):
971-
"""Indicator R1.2-01M: metadata includes provenance information."""
1028+
"""
1029+
Indicator R1.2-01M: metadata includes provenance information.
1030+
"""
9721031
if self.provenance_graph and Graph is not None:
9731032
points = 100
9741033
msg = [

0 commit comments

Comments
 (0)