Skip to content

Commit ef0ae8f

Browse files
authored
Merge pull request #252 from IFCA-Advanced-Computing/dev/gbif
Improving GBIF
2 parents f3e11db + aae2fd3 commit ef0ae8f

12 files changed

+486
-291
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ repos:
55
- id: trailing-whitespace
66
- id: end-of-file-fixer
77
- id: check-yaml
8-
- id: check-added-large-files
8+
# - id: check-added-large-files
99
- repo: https://github.com/psf/black-pre-commit-mirror
1010
rev: 24.3.0
1111
hooks:

api/evaluator.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,14 +1794,16 @@ def rda_r1_3_01d(self, **kwargs):
17941794
terms_reusability_richness_list = terms_reusability_richness["list"]
17951795
terms_reusability_richness_metadata = terms_reusability_richness["metadata"]
17961796

1797-
element = terms_reusability_richness_metadata.loc[
1798-
terms_reusability_richness_metadata["element"].isin(["availableFormats"]),
1799-
"text_value",
1800-
].values[0]
1801-
for form in element:
1802-
availableFormats.append(form["label"])
1803-
18041797
try:
1798+
element = terms_reusability_richness_metadata.loc[
1799+
terms_reusability_richness_metadata["element"].isin(
1800+
["availableFormats"]
1801+
),
1802+
"text_value",
1803+
].values[0]
1804+
for form in element:
1805+
availableFormats.append(form["label"])
1806+
18051807
f = open(path)
18061808
f.close()
18071809

api/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,10 +577,10 @@ def orcid_basic_info(orcid):
577577
item = xmlTree.findall(
578578
".//{http://www.orcid.org/ns/common}assertion-origin-name"
579579
)
580+
basic_info = "ORCID Name: %s" % item[0].text
580581
except Exception as e:
581582
logging.error(e)
582583
return basic_info
583-
basic_info = "ORCID Name: %s" % item[0].text
584584
return basic_info
585585

586586

plugins/gbif/config.ini

Lines changed: 123 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,99 @@
11
[Generic]
22
doi_url = https://doi.org/
3+
# Relative path to the API config file
34
api_config = fair-api.yaml
4-
endpoint= https://api.gbif.org/v1
5+
endpoint=https://api.gbif.org/v1/
6+
57
[local]
68
only_local = false
79
repo = digital_csic
10+
logo_url = 'https://ifca.unican.es'
11+
title = FAIR EVA: Evaluator, Validator & Advisor
812

913
[Repositories]
1014
#Name in plugin, name in tag
1115
oai-pmh = 'Evaluator'
1216
digital_csic = 'Digital.CSIC'
1317
dspace7 = 'DSpace7'
18+
epos= 'epos'
1419
example_plugin = Example_Plugin
15-
gbif = 'Plugin'
20+
signposting = Signposting
21+
gbif = 'gbif'
22+
23+
[dublin-core]
24+
# Aligned with Dublin Core Metadata for Resource Discovery (properties in the /elements/1.1/ namespace)
25+
# https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-3
26+
terms_findability_richness = ['Title',
27+
'Subject',
28+
'Description',
29+
'Type',
30+
'Source',
31+
'Relation',
32+
'Coverage',
33+
'Creator',
34+
'Publisher',
35+
'Contributor',
36+
'Rights',
37+
'Date',
38+
'Format',
39+
'Identifier',
40+
'Language']
1641

1742
[gbif]
18-
# Metadata terms to find the resource identifier
19-
identifier_term = [['alternateIdentifier','']]
20-
21-
# Metadata terms to find the data identifier
22-
identifier_term_data = [['alternateIdentifier','']]
43+
# (meta)data terms to find the resource identifier
44+
identifier_term = [['dataset','alternateIdentifier']]
45+
identifier_term_data = [['dataset','alternateIdentifier']]
2346

2447
# Metadata terms to check richness (generic). These terms should be included [term, qualifier]. None means no qualifier
25-
terms_quali_generic = [['contributor',None],
26-
['date', None],
27-
['description', None],
28-
['identifier', None],
29-
['publisher', None],
30-
['rights', None],
31-
['title', None],
32-
['subject', None]]
48+
terms_quali_generic = [['dataset.creator', 'givenName'],
49+
['dataset.creator', 'surName'],
50+
['dataset', 'pubDate'],
51+
['dataset.abstract', 'para'],
52+
['dataset.intellectualRights.para.ulink', 'citetitle'],
53+
['dataset', 'title'],
54+
['dataset.keywordSet', 'keyword']]
3355

3456
# Metadata terms to check richness (disciplinar). These terms should be included [term, qualifier]
35-
terms_quali_disciplinar = [['contributor', None],
36-
['date', None],
37-
['description', None],
38-
['identifier', None],
39-
['publisher', None],
40-
['rights', None],
41-
['title', None],
42-
['subject', None]]
43-
44-
# Metadata terms that defines accessibility
45-
terms_access = [['access', ''], ['rights', '']]
57+
terms_quali_disciplinar = [['dataset.coverage.geographicCoverage', 'geographicDescription'],
58+
['dataset.coverage.temporalCoverage.rangeOfDates.beginDate', 'calendarDate'],
59+
['dataset.coverage.temporalCoverage.rangeOfDates.endDate', 'calendarDate'],
60+
['dataset.coverage.taxonomicCoverage.taxonomicClassification', 'taxonRankValue']]
61+
62+
# Metadata terms that defines accessibility (case sensitive)
63+
terms_access = [['dataset.intellectualRights.para.ulink', 'citetitle']]
64+
65+
# Metadata terms to check discoverability richness.
66+
#
67+
# Dublin Core element DT-GEO element EPOS element
68+
# ------------------- -------------- ------------
69+
# Title Name title
70+
# Subject Keywords keywords
71+
# Description Description description
72+
# Type Type type
73+
# Source Related DA (relationship) NA
74+
# Relation Related DA NA
75+
# Coverage Spatial relevance, Temporal relevance spatial, temporalCoverage
76+
# Creator Organisation/Person role NA
77+
# Publisher Organisation (name) serviceProvider
78+
# Contributor Organisation/Person role NA
79+
# Rights Licensing constraints license
80+
# Date Temporal relevance temporalCoverage
81+
# Format File format availableFormats
82+
# Identifier Data Unique ID DOI
83+
# Language NA NA
84+
terms_findability_richness = [['dataset', 'title']],
85+
['dataset.keywordSet', 'keyword'],
86+
['dataset.abstract', 'para'],
87+
['dataset.coverage.geographicCoverage', 'geographicDescription'],
88+
['dataset.coverage.temporalCoverage.rangeOfDates.beginDate', 'calendarDate'],
89+
['dataset.coverage.temporalCoverage.rangeOfDates.endDate', 'calendarDate'],
90+
['dataset.intellectualRights.para.ulink', 'citetitle'],
91+
['dataset','alternateIdentifier']]
92+
93+
# Metadata terms to check reusability richness
94+
terms_reusability_richness = [['dataset','alternateIdentifier'],
95+
['additionalMetadata.metadata.gbif', 'hierarchyLevel']]
4696

47-
# Accepted access protocols
48-
terms_access_protocols =['http','https','ftp']
4997

5098
# Manual metadata access
5199
metadata_access_manual = ['TODO']
@@ -72,47 +120,76 @@ terms_vocabularies=[['identifiers','relatedDataProducts'],
72120
['contactPoints','relatedDataProducts']]
73121

74122
# Metadata terms wich includes controlled vocabularies. More controlled vocabularies can be imlpemented in plugins
75-
terms_cv = [['coverage', 'spatial'], ['subject', 'lcsh']]
123+
terms_cv = [['dataset.creator', 'userId']]
76124

77125
# List of data formats that are standard for the community
78126
supported_data_formats = [".txt", ".pdf", ".csv", ".nc", ".doc", ".xls", ".zip", ".rar", ".tar", ".png", ".jpg"]
79127

80128
# Metadata terms that defines links or relation with authors, contributors (preferebly in ORCID format)
81-
terms_qualified_references = [['contributor', None]]
129+
terms_qualified_references = [['dataset.creator', 'userId'],
130+
['dataset.contact', 'userId'],
131+
['dataset.project.personnel', 'userId'],
132+
['dataset.metadataProvider', 'userId' ]]
82133

83134
# Metadata terms that defines links or relation with other resources, (preferebly in ORCID format, URIs or persistent identifiers)
84-
terms_relations = [['relation', None]]
85-
86-
# Metadata terms to check reusability richness
87-
terms_reusability_richness = [['rigths',''],
88-
['license','']]
135+
terms_relations = [['dataset.creator', 'userId']]
89136

90137
# Metadata terms that defines the license type
91-
terms_license = [['rights', '']]
138+
terms_license = [['dataset.intellectualRights.para.ulink', 'citetitle']]
139+
140+
# Metadata terms that defines metadata about provenance
141+
terms_provenance =[['curationAndProvenanceObligations','']]
92142

93-
metadata_schemas = [{'eml': 'eml://ecoinformatics.org/eml-2.1.1'}]
143+
# Accepted access protocols
144+
terms_access_protocols =['http','https','ftp']
145+
146+
# Manual metadata access
147+
metadata_access_manual = ['https://techdocs.gbif.org/en/openapi/']
148+
149+
# Manual data access
150+
data_access_manual = ['https://techdocs.gbif.org/en/openapi/']
151+
152+
# Data model information
153+
terms_data_model = []
94154

95155
#metadata standard
96156
metadata_standard = ['XML']
97157

98-
# Api auth
99-
100-
api_user = mag848
101-
api_pass = stcDPwfQfrnwiQsHNMPRKV7RY
158+
159+
#Policy of metadata persistence
160+
metadata_persistence = []
161+
162+
#Authentication for EPOS
163+
metadata_authentication = []
164+
165+
#terms that use vocabularies and vocabularies used
166+
dict_vocabularies= {'ORCID': 'https://orcid.org/'}
167+
168+
terms_vocabularies=[['identifiers','relatedDataProducts'],
169+
['',''],
170+
['availableFormats',''],
171+
['',''],
172+
['temporalCoverage','relatedDataProducts'],#no temporal metatdata
173+
['',''],
174+
['license',''],
175+
['contactPoints','relatedDataProducts']]
176+
177+
api_mail =
178+
api_user =
179+
api_pass =
180+
102181

103182
[fairsharing]
104183
# username and password
105184
username = ['']
106185

107186
password = ['']
108-
#Path is the folder path ehere the netadata or fomats is stored
109-
#Or if the username or password is given is what you are looking in
110-
metadata_path = ['static/fairsharing_metadata_standards140224.json']
111187

112-
formats_path = ['static/fairsharing_formats260224.txt']
188+
#_path is variable that stores the path to the file in which the fairsharing-approved metadatata standards or formasts are stored
113189

190+
metadata_path = ['static/fairsharing_metadata_standards20240214.json']
114191

115-
fairsharing_formats_path = ['static/fairsharing_formats150224.json']
192+
formats_path = ['static/fairsharing_formats20240226.txt']
116193

117194

118195

0 commit comments

Comments
 (0)