Merge branch 'main' into merge/main-upstream

orviz · orviz · commit 2e788b75b9bb · 2024-08-19T12:04:07.000+02:00
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -11,4 +11,4 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - uses: actions/setup-python@v3
-    - uses: pre-commit/action@v3.0.0
+    - uses: pre-commit/action@v3.0.1
diff --git a/api/rda.py b/api/rda.py
@@ -9,7 +9,7 @@
 from connexion import NoContent
 
 import api.utils as ut
-from api.evaluator import Evaluator
+from api import evaluator
 from fair import app_dirname, load_config
 
 logging.basicConfig(
@@ -39,10 +39,12 @@ def wrapper(body, **kwargs):
         # Get the identifiers through a search query
         ids = [item_id]
         # FIXME oai-pmh should be no different
+        downstream_logger = evaluator.logger
         if repo not in ["oai-pmh"]:
             try:
                 logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
                 plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
+                downstream_logger = plugin.logger
             except Exception as e:
                 logger.error(str(e))
                 return str(e), 400
@@ -55,20 +57,31 @@ def wrapper(body, **kwargs):
                     logger.error(str(e))
                     return str(e), 400
 
+        # Set handler for evaluator logs
+        evaluator_handler = ut.EvaluatorLogHandler()
+        downstream_logger.addHandler(evaluator_handler)
+
         # Collect FAIR checks per metadata identifier
         result = {}
         exit_code = 200
         for item_id in ids:
             # FIXME oai-pmh should be no different
             if repo in ["oai-pmh"]:
-                eva = Evaluator(item_id, oai_base, lang)
+                eva = evaluator.Evaluator(item_id, oai_base, lang)
             else:
                 eva = plugin.Plugin(item_id, oai_base, lang)
             _result, _exit_code = wrapped_func(body, eva=eva)
+            logger.debug(
+                "Raw result returned for indicator ID '%s': %s" % (item_id, _result)
+            )
             result[item_id] = _result
             if _exit_code != 200:
                 exit_code = _exit_code
 
+        # Append evaluator logs to the final results
+        result["evaluator_logs"] = evaluator_handler.logs
+        logger.debug("Evaluator logs appended through 'evaluator_logs' property")
+
         return result, exit_code
 
     return wrapper
@@ -106,7 +119,7 @@ def endpoints(plugin=None, plugins_path="plugins"):
         try:
             return enp[plugin]
         except:
-            return "Input plugin not found"
+            return (enp, 404)
     return enp
 
 
@@ -1537,30 +1550,6 @@ def rda_all(body, eva):
     return result, 200
 
 
-def endpoints(plugin=None):
-    plugins_list = ["epos", "gbif", "digital_csic", "dspace7", "signposting"]
-    plugins_with_endpoint = []
-    links = []
-
-    for plug in plugins_list:
-        try:
-            config = configparser.ConfigParser()
-            config.read("plugins/" + plug + "/config.ini")
-            links.append(config["Generic"]["endpoint"])
-            plugins_with_endpoint.append(plug)
-        except:
-            logging.debug("No endpoint found for " + plug)
-    # Create a dict with all the found endpoints
-    enp = dict(zip(plugins_with_endpoint, links))
-    # If the plugin is given then only returns a message
-    if plugin:
-        try:
-            return (enp[plugin], 200)
-        except:
-            return (plugins_with_endpoint, 404)
-    return (enp, 200)
-
-
 def delete(id_):
     id_ = int(id_)
     return NoContent, 204
diff --git a/api/utils.py b/api/utils.py
@@ -15,6 +15,15 @@
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 
 
+class EvaluatorLogHandler(logging.Handler):
+    def __init__(self, level=logging.DEBUG):
+        self.level = level
+        self.logs = []
+
+    def handle(self, record):
+        self.logs.append("[%s] %s" % (record.levelname, record.msg))
+
+
 def get_doi_str(doi_str):
     doi_to_check = re.findall(
         r"10[\.-]+.[\d\.-]+/[\w\.-]+[\w\.-]+/[\w\.-]+[\w\.-]", doi_str
@@ -834,8 +843,10 @@ def resolve_handle(handle_id):
 
     Returns:
     """
-    resolves = False
-    endpoint = urljoin("https://hdl.handle.net/api/", "handles/%s" % handle_id)
+    handle_id_normalized = idutils.normalize_doi(handle_id)
+    endpoint = urljoin(
+        "https://hdl.handle.net/api/", "handles/%s" % handle_id_normalized
+    )
     headers = {"Content-Type": "application/json"}
     r = requests.get(endpoint, verify=False, headers=headers)
     if not r.ok:
@@ -844,9 +855,10 @@ def resolve_handle(handle_id):
             r.status_code,
         )
         raise Exception(msg)
-
     json_data = r.json()
     response_code = json_data.get("responseCode", -1)
+
+    resolves = False
     if response_code == 1:
         resolves = True
         msg = "Handle and associated values found (HTTP 200 OK)"
diff --git a/docs/How  to use the epos plugin.md b/docs/How  to use the epos plugin.md
@@ -56,10 +56,19 @@ To make sure its the one you are looking for you can make a curl to the API with
 Now take a look at terminal 1, it will display a table with important findability-related terms, one of them is the title, so you can make sure the item is the one that you want,
 (If the table displays a lot of ... items try to make the window wider and retry the test)
 
+#### 2. Use the `--search` optional argument from the fair-eva.py script
 
-#### 2. Connecting directly to the EPOS API
+A simple way to get the UUID is to use the searcher option to conect to the EPOS API. In terminal 2 just use the command:
 
-You can perform a curl to the EPOS API to get your UUID. Yhe process is the same as before
+```
+(terminal #2) python3 scripts/fair-eva.py --search SVO --plugin epos  -j
+```
+
+Then you will select an index and the evaluation will be performed directly.
+
+#### 3. Connecting directly to the EPOS API
+
+You can perform a curl to the EPOS API to get your UUID. The process is the same as before
 ```
 curl -X 'GET' \
   'https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1/resources/search?q=SVO' \
@@ -124,18 +133,13 @@ python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin e
 This command will return the evaluation of the RDA-F1-01M indicator.
 
 ### Scores
-To get a clear view of the scores the CLI has 2 extra parameters that print the punctuation of the item in the distict catergories.
+To get a clear view of the scores the CLI has an extra parameter that print the punctuation of the item in the distict catergories.
 
-You can add -s to get the points in each of the FAIR catergories and the total score.
-```
-(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 -s
-```
-Or you can add -fs to get the points in each of the different checks
- the total score.
+You can add --totals to get the points in each of the FAIR catergories and the total score.
 ```
-(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 -fs
+(terminal #2) python3 scripts/fair-eva.py --id d4101e2f-c1b9-4fde-a4d1-d79a26d5d23a --plugin epos --repository https://ics-c.epos-ip.org/development/k8s-epos-deploy/dt-geo/api/v1 --totals
 ```
-You can also use them both together. Note that the points are not the basic average of the tests, because each test has a different weight.
+ Note that the points are the pondered average of the tests, because each test has a different weight.
 
 ### Configuration through config.ini.
 There are some tests whose results depend on things outside of the metadata given by the EPOS API so their result depends on a configuration parameter. These parameters are stored in the file 'config.ini' you can change these parameters to change some results. WARNING a lot of parameters are essential for the tool to work. If the parameter you are interested in changing doesn't appear on the following list you probably shouldn't change it:
diff --git a/plugins/epos/config.ini b/plugins/epos/config.ini
@@ -151,7 +151,7 @@ metadata_authentication = []
 #terms that use vocabularies and vocabularies used
 dict_vocabularies= {'ROR': 'https://ror.org/', 'PIC': 'https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/how-to-participate/participant-register', 'imtypes': 'https://www.iana.org/assignments/media-types/media-types.xhtml', 'TRL': 'TRL', 'temporal': 'https://www.iso.org/iso-8601-date-and-time-format.html', 'Rolecode': 'Rolecode', 'spdx': 'https://spdx.org/licenses/', 'ORCID': 'https://orcid.org/'}
 
-terms_vocabularies=[['dataProvider','relatedDataProducts'],
+terms_vocabularies=[['identifiers','relatedDataProducts'],
                    ['',''],
                    ['availableFormats',''],
                    ['',''],
diff --git a/plugins/epos/plugin.py b/plugins/epos/plugin.py
@@ -60,7 +60,9 @@ def __init__(self, item_id, oai_base=None, lang="en", config=None):
             metadata_sample,
             columns=["metadata_schema", "element", "text_value", "qualifier"],
         )
-        logger.debug("METADATA: %s" % (self.metadata))
+        logger.debug(
+            "Obtained metadata from repository: %s" % (self.metadata.to_json())
+        )
         # Protocol for (meta)data accessing
         if len(self.metadata) > 0:
             self.access_protocols = ["http"]
@@ -145,7 +147,9 @@ def get_metadata(self):
         metadata_sample = []
         eml_schema = "epos"
 
-        final_url = self.oai_base + "/resources/details/" + self.item_id
+        final_url = (
+            self.oai_base + "/resources/details/" + self.item_id + "?extended=true"
+        )
 
         error_in_metadata = False
         headers = {
@@ -1076,7 +1080,8 @@ def rda_a1_2_01d(self):
             )
         return points, msg
 
-    def rda_a2_01m(self):
+    @ConfigTerms(term_id="terms_access")
+    def rda_a2_01m(self, return_protocol=False, **kwargs):
         """Indicator RDA-A2-01M A2: Metadata should be  accessible even when the data is no longer available.
         The indicator intends to verify that information about a digital object is still available after
         the object has been deleted or otherwise has been lost. If possible, the metadata that
@@ -1094,10 +1099,39 @@ def rda_a2_01m(self):
         msg = _(
             "Preservation policy depends on the authority where this Digital Object is stored"
         )
+
         if self.metadata_persistence:
             if ut.check_link(self.metadata_persistence[0]):
                 points = 100
                 msg = "The preservation policy is: " + str(self.metadata_persistence[0])
+            return (points, [{"message": msg, "points": points}])
+
+        terms_access = kwargs["terms_access"]
+        terms_access_list = terms_access["list"]
+        terms_access_metadata = terms_access["metadata"]
+
+        _elements = [
+            "downloadURL",
+        ]
+
+        url = terms_access_metadata.loc[
+            terms_access_metadata["element"] == "downloadURL", "text_value"
+        ]
+
+        if len(url.values) == 0:
+            return (
+                points,
+                [
+                    {
+                        "message": "Could not check data access protocol or persistence policy: EPOS metadata element <downloadURL> not found",
+                        "points": points,
+                    }
+                ],
+            )
+        else:
+            if not ut.check_link(url.values[0]):
+                points = 100
+                msg = "Metadata is available after the data is no longer available."
 
         return (points, [{"message": msg, "points": points}])
 
@@ -1152,7 +1186,8 @@ def rda_i1_01m(self, **kwargs):
         info = dict(zip(self.vocabularies, vocabularies_element_list))
         for vocab in info.keys():
             if vocab == "ROR":
-                for iden in info[vocab][0][0]["identifiers"]:
+                for iden in info[vocab][0]:
+                    # return(0,'testing')
                     if iden["type"] == "ROR":
                         exists, name = ut.check_ror(iden["value"])
                         if exists:
@@ -1291,6 +1326,13 @@ def rda_i1_02m(self, **kwargs):
                 "The metadata standard in use provides a machine-understandable knowledge expression: %s"
                 % self.metadata_standard
             )
+            logger.info(msg)
+        else:
+            msg = (
+                "The metadata standard in use does not provide a machine-understandable knowledge expression: %s"
+                % self.metadata_standard
+            )
+            logger.warning(msg)
 
         return (points, [{"message": msg, "points": points}])
 
@@ -1675,8 +1717,6 @@ def rda_r1_1_03m(self, **kwargs):
         msg
             Message with the results or recommendations to improve this indicator
         """
-        msg_list = []
-
         terms_license = kwargs["terms_license"]
         terms_license_metadata = terms_license["metadata"]
 
@@ -1691,13 +1731,12 @@ def rda_r1_1_03m(self, **kwargs):
         if _points_license == 100:
             _msg = "License/s are machine readable according to SPDX"
         elif _points_license == 0:
-            _msg = "License/s arenot machine readable according to SPDX"
+            _msg = "License/s are not machine readable according to SPDX"
         else:
             _msg = "A subset of the license/s are machine readable according to SPDX"
         logger.info(_msg)
-        msg_list.append({"message": _msg, "points": _points_license})
 
-        return (_points_license, [{"message": msg_list, "points": _points_license}])
+        return (_points_license, [{"message": _msg, "points": _points_license}])
 
     @ConfigTerms(term_id="terms_provenance")
     def rda_r1_2_01m(self, **kwargs):
@@ -1901,11 +1940,13 @@ def rda_r1_3_02d(self, **kwargs):
            - 100/100 if the data format is machine understandable
            - 0/100 otherwise
         """
-        msg = "No data standard found"
+        msg = ""
         points = 0
 
-        points, msg = self.rda_r1_3_01d()
+        points, _msg = self.rda_r1_3_01d()
         if points == 100:
             msg = "Your data standard is expressed in compliance with a  machine-understandable community standard"
+        else:
+            msg = "No data standard found"
 
         return (points, [{"message": msg, "points": points}])
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "werkzeug == 2.3.8",
+    "werkzeug == 3.0.3",
     "connexion",
     "swagger-ui-bundle == 0.0.6",
    "flask == 2.2.5",
diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@
 # connexion requires werkzeug but connexion < 2.4.0 does not install werkzeug
 # we must peg werkzeug versions below to fix connexion
 # https://github.com/zalando/connexion/pull/1044
-werkzeug == 2.3.8
+werkzeug == 3.0.3
 connexion[flask,uvicorn]
 #swagger-ui-bundle == 0.0.6
 swagger-ui-bundle == 0.0.6
@@ -22,3 +22,4 @@ PyPDF2
 rdflib
 dicttoxml
 prettytable
+pyarrow
diff --git a/scripts/fair-eva.py b/scripts/fair-eva.py
diff --git a/utils/pdf_gen.py b/utils/pdf_gen.py

-Original file line number
+Diff line change
     steps:
     - uses: actions/checkout@v3
     - uses: actions/setup-python@v3
 -    - uses: pre-commit/[email protected].0
 +    - uses: pre-commit/[email protected].1
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ classifiers = [`
`16`	`16`	`]`
`17`	`17`
`18`	`18`	`dependencies = [`
`19`		`- "werkzeug == 2.3.8",`
	`19`	`+ "werkzeug == 3.0.3",`
`20`	`20`	`"connexion",`
`21`	`21`	`"swagger-ui-bundle == 0.0.6",`
`22`	`22`	`"flask == 2.2.5",`