Skip to content

Commit 2e788b7

Browse files
committed
Merge branch 'main' into merge/main-upstream
2 parents 7f2e869 + ebfadea commit 2e788b7

File tree

10 files changed

+324
-139
lines changed

10 files changed

+324
-139
lines changed

.github/workflows/pre-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ jobs:
1111
steps:
1212
- uses: actions/checkout@v3
1313
- uses: actions/setup-python@v3
14-
- uses: pre-commit/[email protected].0
14+
- uses: pre-commit/[email protected].1

api/rda.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from connexion import NoContent
1010

1111
import api.utils as ut
12-
from api.evaluator import Evaluator
12+
from api import evaluator
1313
from fair import app_dirname, load_config
1414

1515
logging.basicConfig(
@@ -39,10 +39,12 @@ def wrapper(body, **kwargs):
3939
# Get the identifiers through a search query
4040
ids = [item_id]
4141
# FIXME oai-pmh should be no different
42+
downstream_logger = evaluator.logger
4243
if repo not in ["oai-pmh"]:
4344
try:
4445
logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
4546
plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
47+
downstream_logger = plugin.logger
4648
except Exception as e:
4749
logger.error(str(e))
4850
return str(e), 400
@@ -55,20 +57,31 @@ def wrapper(body, **kwargs):
5557
logger.error(str(e))
5658
return str(e), 400
5759

60+
# Set handler for evaluator logs
61+
evaluator_handler = ut.EvaluatorLogHandler()
62+
downstream_logger.addHandler(evaluator_handler)
63+
5864
# Collect FAIR checks per metadata identifier
5965
result = {}
6066
exit_code = 200
6167
for item_id in ids:
6268
# FIXME oai-pmh should be no different
6369
if repo in ["oai-pmh"]:
64-
eva = Evaluator(item_id, oai_base, lang)
70+
eva = evaluator.Evaluator(item_id, oai_base, lang)
6571
else:
6672
eva = plugin.Plugin(item_id, oai_base, lang)
6773
_result, _exit_code = wrapped_func(body, eva=eva)
74+
logger.debug(
75+
"Raw result returned for indicator ID '%s': %s" % (item_id, _result)
76+
)
6877
result[item_id] = _result
6978
if _exit_code != 200:
7079
exit_code = _exit_code
7180

81+
# Append evaluator logs to the final results
82+
result["evaluator_logs"] = evaluator_handler.logs
83+
logger.debug("Evaluator logs appended through 'evaluator_logs' property")
84+
7285
return result, exit_code
7386

7487
return wrapper
@@ -106,7 +119,7 @@ def endpoints(plugin=None, plugins_path="plugins"):
106119
try:
107120
return enp[plugin]
108121
except:
109-
return "Input plugin not found"
122+
return (enp, 404)
110123
return enp
111124

112125

@@ -1537,30 +1550,6 @@ def rda_all(body, eva):
15371550
return result, 200
15381551

15391552

1540-
def endpoints(plugin=None):
1541-
plugins_list = ["epos", "gbif", "digital_csic", "dspace7", "signposting"]
1542-
plugins_with_endpoint = []
1543-
links = []
1544-
1545-
for plug in plugins_list:
1546-
try:
1547-
config = configparser.ConfigParser()
1548-
config.read("plugins/" + plug + "/config.ini")
1549-
links.append(config["Generic"]["endpoint"])
1550-
plugins_with_endpoint.append(plug)
1551-
except:
1552-
logging.debug("No endpoint found for " + plug)
1553-
# Create a dict with all the found endpoints
1554-
enp = dict(zip(plugins_with_endpoint, links))
1555-
# If the plugin is given then only returns a message
1556-
if plugin:
1557-
try:
1558-
return (enp[plugin], 200)
1559-
except:
1560-
return (plugins_with_endpoint, 404)
1561-
return (enp, 200)
1562-
1563-
15641553
def delete(id_):
15651554
id_ = int(id_)
15661555
return NoContent, 204

api/utils.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
1616

1717

18+
class EvaluatorLogHandler(logging.Handler):
19+
def __init__(self, level=logging.DEBUG):
20+
self.level = level
21+
self.logs = []
22+
23+
def handle(self, record):
24+
self.logs.append("[%s] %s" % (record.levelname, record.msg))
25+
26+
1827
def get_doi_str(doi_str):
1928
doi_to_check = re.findall(
2029
r"10[\.-]+.[\d\.-]+/[\w\.-]+[\w\.-]+/[\w\.-]+[\w\.-]", doi_str
@@ -834,8 +843,10 @@ def resolve_handle(handle_id):
834843
835844
Returns:
836845
"""
837-
resolves = False
838-
endpoint = urljoin("https://hdl.handle.net/api/", "handles/%s" % handle_id)
846+
handle_id_normalized = idutils.normalize_doi(handle_id)
847+
endpoint = urljoin(
848+
"https://hdl.handle.net/api/", "handles/%s" % handle_id_normalized
849+
)
839850
headers = {"Content-Type": "application/json"}
840851
r = requests.get(endpoint, verify=False, headers=headers)
841852
if not r.ok:
@@ -844,9 +855,10 @@ def resolve_handle(handle_id):
844855
r.status_code,
845856
)
846857
raise Exception(msg)
847-
848858
json_data = r.json()
849859
response_code = json_data.get("responseCode", -1)
860+
861+
resolves = False
850862
if response_code == 1:
851863
resolves = True
852864
msg = "Handle and associated values found (HTTP 200 OK)"

docs/How to use the epos plugin.md

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,19 @@ To make sure its the one you are looking for you can make a curl to the API with
5656
Now take a look at terminal 1, it will display a table with important findability-related terms, one of them is the title, so you can make sure the item is the one that you want,
5757
(If the table displays a lot of ... items try to make the window wider and retry the test)
5858

59+
#### 2. Use the `--search` optional argument from the fair-eva.py script
5960

60-
#### 2. Connecting directly to the EPOS API
61+
A simple way to get the UUID is to use the searcher option to conect to the EPOS API. In terminal 2 just use the command:
6162

62-
You can perform a curl to the EPOS API to get your UUID. Yhe process is the same as before
63+
```
64+
(terminal #2) python3 scripts/fair-eva.py --search SVO --plugin epos -j
65+
```
66+
67+
Then you will select an index and the evaluation will be performed directly.
68+
69+
#### 3. Connecting directly to the EPOS API
70+
71+
You can perform a curl to the EPOS API to get your UUID. The process is the same as before
6372
```
6473
curl -X 'GET' \
6574
'https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1/resources/search?q=SVO' \
@@ -124,18 +133,13 @@ python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin e
124133
This command will return the evaluation of the RDA-F1-01M indicator.
125134

126135
### Scores
127-
To get a clear view of the scores the CLI has 2 extra parameters that print the punctuation of the item in the distict catergories.
136+
To get a clear view of the scores the CLI has an extra parameter that print the punctuation of the item in the distict catergories.
128137

129-
You can add -s to get the points in each of the FAIR catergories and the total score.
130-
```
131-
(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 -s
132-
```
133-
Or you can add -fs to get the points in each of the different checks
134-
the total score.
138+
You can add --totals to get the points in each of the FAIR catergories and the total score.
135139
```
136-
(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 -fs
140+
(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 --totals
137141
```
138-
You can also use them both together. Note that the points are not the basic average of the tests, because each test has a different weight.
142+
Note that the points are the pondered average of the tests, because each test has a different weight.
139143

140144
### Configuration through config.ini.
141145
There are some tests whose results depend on things outside of the metadata given by the EPOS API so their result depends on a configuration parameter. These parameters are stored in the file 'config.ini' you can change these parameters to change some results. WARNING a lot of parameters are essential for the tool to work. If the parameter you are interested in changing doesn't appear on the following list you probably shouldn't change it:

plugins/epos/config.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ metadata_authentication = []
151151
#terms that use vocabularies and vocabularies used
152152
dict_vocabularies= {'ROR': 'https://ror.org/', 'PIC': 'https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/how-to-participate/participant-register', 'imtypes': 'https://www.iana.org/assignments/media-types/media-types.xhtml', 'TRL': 'TRL', 'temporal': 'https://www.iso.org/iso-8601-date-and-time-format.html', 'Rolecode': 'Rolecode', 'spdx': 'https://spdx.org/licenses/', 'ORCID': 'https://orcid.org/'}
153153

154-
terms_vocabularies=[['dataProvider','relatedDataProducts'],
154+
terms_vocabularies=[['identifiers','relatedDataProducts'],
155155
['',''],
156156
['availableFormats',''],
157157
['',''],

plugins/epos/plugin.py

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ def __init__(self, item_id, oai_base=None, lang="en", config=None):
6060
metadata_sample,
6161
columns=["metadata_schema", "element", "text_value", "qualifier"],
6262
)
63-
logger.debug("METADATA: %s" % (self.metadata))
63+
logger.debug(
64+
"Obtained metadata from repository: %s" % (self.metadata.to_json())
65+
)
6466
# Protocol for (meta)data accessing
6567
if len(self.metadata) > 0:
6668
self.access_protocols = ["http"]
@@ -145,7 +147,9 @@ def get_metadata(self):
145147
metadata_sample = []
146148
eml_schema = "epos"
147149

148-
final_url = self.oai_base + "/resources/details/" + self.item_id
150+
final_url = (
151+
self.oai_base + "/resources/details/" + self.item_id + "?extended=true"
152+
)
149153

150154
error_in_metadata = False
151155
headers = {
@@ -1076,7 +1080,8 @@ def rda_a1_2_01d(self):
10761080
)
10771081
return points, msg
10781082

1079-
def rda_a2_01m(self):
1083+
@ConfigTerms(term_id="terms_access")
1084+
def rda_a2_01m(self, return_protocol=False, **kwargs):
10801085
"""Indicator RDA-A2-01M A2: Metadata should be accessible even when the data is no longer available.
10811086
The indicator intends to verify that information about a digital object is still available after
10821087
the object has been deleted or otherwise has been lost. If possible, the metadata that
@@ -1094,10 +1099,39 @@ def rda_a2_01m(self):
10941099
msg = _(
10951100
"Preservation policy depends on the authority where this Digital Object is stored"
10961101
)
1102+
10971103
if self.metadata_persistence:
10981104
if ut.check_link(self.metadata_persistence[0]):
10991105
points = 100
11001106
msg = "The preservation policy is: " + str(self.metadata_persistence[0])
1107+
return (points, [{"message": msg, "points": points}])
1108+
1109+
terms_access = kwargs["terms_access"]
1110+
terms_access_list = terms_access["list"]
1111+
terms_access_metadata = terms_access["metadata"]
1112+
1113+
_elements = [
1114+
"downloadURL",
1115+
]
1116+
1117+
url = terms_access_metadata.loc[
1118+
terms_access_metadata["element"] == "downloadURL", "text_value"
1119+
]
1120+
1121+
if len(url.values) == 0:
1122+
return (
1123+
points,
1124+
[
1125+
{
1126+
"message": "Could not check data access protocol or persistence policy: EPOS metadata element <downloadURL> not found",
1127+
"points": points,
1128+
}
1129+
],
1130+
)
1131+
else:
1132+
if not ut.check_link(url.values[0]):
1133+
points = 100
1134+
msg = "Metadata is available after the data is no longer available."
11011135

11021136
return (points, [{"message": msg, "points": points}])
11031137

@@ -1152,7 +1186,8 @@ def rda_i1_01m(self, **kwargs):
11521186
info = dict(zip(self.vocabularies, vocabularies_element_list))
11531187
for vocab in info.keys():
11541188
if vocab == "ROR":
1155-
for iden in info[vocab][0][0]["identifiers"]:
1189+
for iden in info[vocab][0]:
1190+
# return(0,'testing')
11561191
if iden["type"] == "ROR":
11571192
exists, name = ut.check_ror(iden["value"])
11581193
if exists:
@@ -1291,6 +1326,13 @@ def rda_i1_02m(self, **kwargs):
12911326
"The metadata standard in use provides a machine-understandable knowledge expression: %s"
12921327
% self.metadata_standard
12931328
)
1329+
logger.info(msg)
1330+
else:
1331+
msg = (
1332+
"The metadata standard in use does not provide a machine-understandable knowledge expression: %s"
1333+
% self.metadata_standard
1334+
)
1335+
logger.warning(msg)
12941336

12951337
return (points, [{"message": msg, "points": points}])
12961338

@@ -1675,8 +1717,6 @@ def rda_r1_1_03m(self, **kwargs):
16751717
msg
16761718
Message with the results or recommendations to improve this indicator
16771719
"""
1678-
msg_list = []
1679-
16801720
terms_license = kwargs["terms_license"]
16811721
terms_license_metadata = terms_license["metadata"]
16821722

@@ -1691,13 +1731,12 @@ def rda_r1_1_03m(self, **kwargs):
16911731
if _points_license == 100:
16921732
_msg = "License/s are machine readable according to SPDX"
16931733
elif _points_license == 0:
1694-
_msg = "License/s arenot machine readable according to SPDX"
1734+
_msg = "License/s are not machine readable according to SPDX"
16951735
else:
16961736
_msg = "A subset of the license/s are machine readable according to SPDX"
16971737
logger.info(_msg)
1698-
msg_list.append({"message": _msg, "points": _points_license})
16991738

1700-
return (_points_license, [{"message": msg_list, "points": _points_license}])
1739+
return (_points_license, [{"message": _msg, "points": _points_license}])
17011740

17021741
@ConfigTerms(term_id="terms_provenance")
17031742
def rda_r1_2_01m(self, **kwargs):
@@ -1901,11 +1940,13 @@ def rda_r1_3_02d(self, **kwargs):
19011940
- 100/100 if the data format is machine understandable
19021941
- 0/100 otherwise
19031942
"""
1904-
msg = "No data standard found"
1943+
msg = ""
19051944
points = 0
19061945

1907-
points, msg = self.rda_r1_3_01d()
1946+
points, _msg = self.rda_r1_3_01d()
19081947
if points == 100:
19091948
msg = "Your data standard is expressed in compliance with a machine-understandable community standard"
1949+
else:
1950+
msg = "No data standard found"
19101951

19111952
return (points, [{"message": msg, "points": points}])

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ classifiers = [
1616
]
1717

1818
dependencies = [
19-
"werkzeug == 2.3.8",
19+
"werkzeug == 3.0.3",
2020
"connexion",
2121
"swagger-ui-bundle == 0.0.6",
2222
"flask == 2.2.5",

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# connexion requires werkzeug but connexion < 2.4.0 does not install werkzeug
55
# we must peg werkzeug versions below to fix connexion
66
# https://github.com/zalando/connexion/pull/1044
7-
werkzeug == 2.3.8
7+
werkzeug == 3.0.3
88
connexion[flask,uvicorn]
99
#swagger-ui-bundle == 0.0.6
1010
swagger-ui-bundle == 0.0.6
@@ -22,3 +22,4 @@ PyPDF2
2222
rdflib
2323
dicttoxml
2424
prettytable
25+
pyarrow

0 commit comments

Comments
 (0)