Skip to content

Commit de9bcde

Browse files
committed
DIGITAL.CSIC plugin working
1 parent b940805 commit de9bcde

File tree

4 files changed

+297
-161
lines changed

4 files changed

+297
-161
lines changed

api/evaluator.py

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,7 @@ def rda_i1_01d(self):
11721172
return (points, msg_list)
11731173

11741174
def rda_i1_02m(self):
1175+
# TOFIX - This is very OAI-PMH dependant
11751176
"""Indicator RDA-A1-01M.
11761177
11771178
This indicator is linked to the following principle: I1: (Meta)data use a formal, accessible,
@@ -1460,56 +1461,58 @@ def rda_i3_04m(self):
14601461
return self.rda_i3_03m()
14611462

14621463
# REUSABLE
1463-
def rda_r1_01m(self):
1464-
"""Indicator RDA-A1-01M
1464+
@ConfigTerms(term_id="terms_reusability_richness")
1465+
def rda_r1_01m(self, **kwargs):
1466+
"""Indicator RDA-R1-01M: Plurality of accurate and relevant attributes are provided to allow reuse.
1467+
14651468
This indicator is linked to the following principle: R1: (Meta)data are richly described with a
14661469
plurality of accurate and relevant attributes. More information about that principle can be
14671470
found here.
1471+
14681472
The indicator concerns the quantity but also the quality of metadata provided in order to
14691473
enhance data reusability.
1470-
Technical proposal:
1471-
Parameters
1472-
----------
1473-
item_id : str
1474-
Digital Object identifier, which can be a generic one (DOI, PID), or an internal (e.g. an
1475-
identifier from the repo)
1474+
14761475
Returns
14771476
-------
14781477
points
1479-
A number between 0 and 100 to indicate how well this indicator is supported
1478+
Proportional to the number of terms considered to enhance reusability
14801479
msg
14811480
Message with the results or recommendations to improve this indicator
14821481
"""
1483-
# Depending on the metadata schema used, checks that at least the mandatory terms are filled (75%)
1484-
# and the number of terms are high (25%)
1485-
msg_list = []
1486-
logger.debug(_("Checking Dublin Core as multidisciplinar schema"))
1482+
points = 0
14871483

1488-
md_term_list = pd.DataFrame(
1489-
self.terms_quali_disciplinar, columns=["term", "qualifier"]
1490-
)
1491-
md_term_list = ut.check_metadata_terms(self.metadata, md_term_list)
1492-
points = (
1493-
100
1494-
* (len(md_term_list) - (len(md_term_list) - sum(md_term_list["found"])))
1495-
/ len(md_term_list)
1496-
)
1497-
if points == 100:
1498-
msg_list.append(
1499-
{"message": _("All mandatory terms included"), "points": points}
1484+
terms_reusability_richness = kwargs["terms_reusability_richness"]
1485+
terms_reusability_richness_list = terms_reusability_richness["list"]
1486+
terms_reusability_richness_metadata = terms_reusability_richness["metadata"]
1487+
1488+
reusability_element_list = []
1489+
for element in terms_reusability_richness_list:
1490+
element_df = terms_reusability_richness_metadata.loc[
1491+
terms_reusability_richness_metadata["element"].isin([element[0]]),
1492+
"text_value",
1493+
]
1494+
1495+
element_values = element_df.values
1496+
if len(element_values) > 0:
1497+
reusability_element_list.extend(element_values)
1498+
if len(reusability_element_list) > 0:
1499+
msg = "Found %s metadata elements that enhance reusability: %s" % (
1500+
len(reusability_element_list),
1501+
reusability_element_list,
15001502
)
15011503
else:
1502-
for i, e in md_term_list.iterrows():
1503-
if e["found"] == 0:
1504-
msg_list.append(
1505-
{
1506-
"message": _("Missing term")
1507-
+ ": %s, qualifier: %s" % (e["term"], e["qualifier"]),
1508-
"points": points,
1509-
}
1510-
)
1504+
msg = "Could not find any metadata element that enhance reusability"
1505+
points = (
1506+
len(
1507+
terms_reusability_richness_metadata.groupby(
1508+
["element", "qualifier"]
1509+
).count()
1510+
)
1511+
/ len(terms_reusability_richness["list"])
1512+
* 100
1513+
)
15111514

1512-
return (points, msg_list)
1515+
return (points, [{"message": msg, "points": points}])
15131516

15141517
@ConfigTerms(term_id="terms_license")
15151518
def rda_r1_1_01m(self, license_list=[], **kwargs):

api/utils.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,31 @@ def check_standard_project_relation(value):
651651
return False
652652

653653

654+
def oai_request(oai_base, action):
655+
oai = requests.get(oai_base + action, verify=False) # Peticion al servidor
656+
try:
657+
xmlTree = ET.fromstring(oai.text)
658+
except Exception as e:
659+
logging.error("OAI_RQUEST: %s" % e)
660+
xmlTree = ET.fromstring("<OAI-PMH></OAI-PMH>")
661+
return xmlTree
662+
663+
664+
def oai_metadataFormats(oai_base):
665+
action = "?verb=ListMetadataFormats"
666+
xmlTree = oai_request(oai_base, action)
667+
metadataFormats = {}
668+
for e in xmlTree.findall(".//{http://www.openarchives.org/OAI/2.0/}metadataFormat"):
669+
metadataPrefix = e.find(
670+
"{http://www.openarchives.org/OAI/2.0/}metadataPrefix"
671+
).text
672+
namespace = e.find(
673+
"{http://www.openarchives.org/OAI/2.0/}metadataNamespace"
674+
).text
675+
metadataFormats[metadataPrefix] = namespace
676+
return metadataFormats
677+
678+
654679
def get_rdf_metadata_format(oai_base):
655680
rdf_schemas = []
656681
try:
@@ -664,6 +689,109 @@ def get_rdf_metadata_format(oai_base):
664689
return rdf_schemas
665690

666691

692+
def oai_check_record_url(oai_base, metadata_prefix, pid):
693+
endpoint_root = urllib.parse.urlparse(oai_base).netloc
694+
try:
695+
pid_type = idutils.detect_identifier_schemes(pid)[0]
696+
except Exception as e:
697+
pid_type = "internal"
698+
logging.error(e)
699+
if pid_type != "internal":
700+
oai_pid = idutils.normalize_pid(pid, pid_type)
701+
else:
702+
oai_pid = pid
703+
action = "?verb=GetRecord"
704+
705+
test_id = "oai:%s:%s" % (endpoint_root, oai_pid)
706+
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
707+
url_final = ""
708+
url = oai_base + action + params
709+
response = requests.get(url, verify=False, allow_redirects=True)
710+
logging.debug("Trying ID v1: url: %s | status: %i" % (url, response.status_code))
711+
error = 0
712+
for tags in ET.fromstring(response.text).findall(
713+
".//{http://www.openarchives.org/OAI/2.0/}error"
714+
):
715+
error = error + 1
716+
if error == 0:
717+
url_final = url
718+
719+
test_id = "%s" % (oai_pid)
720+
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
721+
722+
url = oai_base + action + params
723+
logging.debug("Trying: " + url)
724+
response = requests.get(url, verify=False)
725+
error = 0
726+
for tags in ET.fromstring(response.text).findall(
727+
".//{http://www.openarchives.org/OAI/2.0/}error"
728+
):
729+
error = error + 1
730+
if error == 0:
731+
url_final = url
732+
733+
test_id = "%s:%s" % (pid_type, oai_pid)
734+
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
735+
736+
url = oai_base + action + params
737+
logging.debug("Trying: " + url)
738+
response = requests.get(url, verify=False)
739+
error = 0
740+
for tags in ET.fromstring(response.text).findall(
741+
".//{http://www.openarchives.org/OAI/2.0/}error"
742+
):
743+
error = error + 1
744+
if error == 0:
745+
url_final = url
746+
747+
test_id = "oai:%s:%s" % (
748+
endpoint_root,
749+
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
750+
)
751+
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
752+
753+
url = oai_base + action + params
754+
logging.debug("Trying: " + url)
755+
response = requests.get(url, verify=False)
756+
error = 0
757+
for tags in ET.fromstring(response.text).findall(
758+
".//{http://www.openarchives.org/OAI/2.0/}error"
759+
):
760+
error = error + 1
761+
if error == 0:
762+
url_final = url
763+
764+
test_id = "oai:%s:b2rec/%s" % (
765+
endpoint_root,
766+
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
767+
)
768+
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
769+
770+
url = oai_base + action + params
771+
logging.debug("Trying: " + url)
772+
response = requests.get(url, verify=False)
773+
error = 0
774+
for tags in ET.fromstring(response.text).findall(
775+
".//{http://www.openarchives.org/OAI/2.0/}error"
776+
):
777+
error = error + 1
778+
if error == 0:
779+
url_final = url
780+
781+
return url_final
782+
783+
784+
def oai_get_metadata(url):
785+
logger.debug("Metadata from: %s" % url)
786+
oai = requests.get(url, verify=False, allow_redirects=True)
787+
try:
788+
xmlTree = ET.fromstring(oai.text)
789+
except Exception as e:
790+
logger.error("OAI_RQUEST: %s" % e)
791+
xmlTree = None
792+
return xmlTree
793+
794+
667795
def licenses_list():
668796
url = "https://spdx.org/licenses/licenses.json"
669797
headers = {"Accept": "application/json"} # Type of response accpeted
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
[Generic]
2+
endpoint=http://digital.csic.es/dspace-oai/request
3+
[digital_csic]
4+
db_host =
5+
db_port =
6+
db_user =
7+
db_pass =
8+
db_db =
9+
oai_base = http://digital.csic.es/dspace-oai/request
10+
11+
# (meta)data terms to find the resource identifier
12+
identifier_term = [['identifier', 'doi'], ['identifier', 'uri']]
13+
identifier_term_data = [['identifier', 'doi'], ['identifier', 'uri']]
14+
15+
16+
# Metadata terms to check richness (generic). These terms should be included
17+
terms_quali_generic = [['contributor','author'],
18+
['date', 'issued'],
19+
['title', ''],
20+
['identifier', 'citation'],
21+
['publisher', ''],
22+
['identifier', 'uri'],
23+
['type', ''],
24+
['language', 'iso'],
25+
['relation', 'csic'],
26+
['rights', '']]
27+
28+
terms_quali_disciplinar = [['contributor','author'],
29+
['date', 'issued'],
30+
['title', ''],
31+
['identifier', 'citation'],
32+
['publisher', ''],
33+
['identifier', 'uri'],
34+
['type', ''],
35+
['language', 'iso'],
36+
['relation', 'csic'],
37+
['rights', '']]
38+
39+
terms_access = [['access', ''], ['rights', '']]
40+
41+
# Accepted access protocols
42+
terms_access_protocols =['http','https']
43+
44+
# Manual metadata access
45+
metadata_access_manual = ['TODO']
46+
47+
# Manual data access
48+
data_access_manual = ['TODO']
49+
50+
#Policy of metadata persistence
51+
metadata_persistence = []
52+
53+
#Authentication for EPOS
54+
metadata_authentication = []
55+
56+
#terms that use vocabularies and vocabularies used
57+
dict_vocabularies= {'ROR': 'https://ror.org/', 'PIC': 'https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/how-to-participate/participant-register', 'imtypes': 'https://www.iana.org/assignments/media-types/media-types.xhtml', 'TRL': 'TRL', 'temporal': 'https://www.iso.org/iso-8601-date-and-time-format.html', 'Rolecode': 'Rolecode', 'spdx': 'https://spdx.org/licenses/', 'ORCID': 'https://orcid.org/'}
58+
59+
terms_vocabularies=[['identifiers','relatedDataProducts'],
60+
['availableFormats',''],
61+
['temporalCoverage','relatedDataProducts'],#no temporal metatdata
62+
['license',''],
63+
['contactPoints','relatedDataProducts']]
64+
65+
66+
terms_cv = [['coverage', 'spatial'], ['subject', 'lcsh'], ['subject', 'uri'], ['type', 'coar']]
67+
supported_data_formats = [".tif", ".aig", ".asc", ".agr", ".grd", ".nc", ".hdf", ".hdf5",
68+
".pdf", ".odf", ".doc", ".docx", ".csv", ".jpg", ".png", ".gif",
69+
".mp4", ".xml", ".rdf", ".txt", ".mp3", ".wav", ".zip", ".rar",
70+
".tar", ".tar.gz", ".jpeg", ".xls", ".xlsx"]
71+
72+
terms_qualified_references = [['identifier','funder']]
73+
terms_relations = [['relation', 'uri'], ['relation', ''], ['contributor','orcid'], ['contributor', 'funder']]
74+
terms_relations_only_data = [['relation', 'uri'], ['relation', ''], ['relation','isbasedon'], ['relation', 'isreferencedby)']]
75+
76+
77+
terms_license = [['rights', ''], ['license', '']]
78+
79+
# Metadata terms to check reusability richness
80+
terms_reusability_richness = [['contributor','author'],
81+
['date', 'issued'],
82+
['title', ''],
83+
['identifier', 'citation'],
84+
['publisher', ''],
85+
['identifier', 'uri'],
86+
['type', ''],
87+
['language', 'iso'],
88+
['relation', 'csic'],
89+
['rights', '']]
90+
91+
#metadata standard
92+
metadata_standard = ['dc']
93+
94+
metadata_schemas = {'dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/'}
95+
96+
prov_terms = [['description', 'provenance']]

0 commit comments

Comments
 (0)