Skip to content

Commit 032cbdc

Browse files
committed
feat: Copy MultilangProfile from ckanext-switzerland-ng
1 parent 94b0bac commit 032cbdc

File tree

2 files changed

+95
-15
lines changed

2 files changed

+95
-15
lines changed

ckanext/switzerland/dcat/profiles.py

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,20 @@
33
import time
44
from datetime import datetime
55

6+
import ckan.plugins.toolkit as tk
67
import rdflib
78
from ckan.lib.helpers import url_for
89
from rdflib import BNode, Literal, URIRef
910
from rdflib.namespace import RDF, RDFS, SKOS, Namespace
1011

11-
from ckanext.dcat.profiles import RDFProfile
12+
from ckanext.dcat.profiles import CleanedURIRef, RDFProfile, SchemaOrgProfile
1213
from ckanext.dcat.utils import resource_uri
1314
from ckanext.switzerland.helpers import (
1415
get_langs,
1516
map_to_valid_format,
1617
ogdch_get_default_terms_of_use,
18+
get_publisher_dict_from_dataset,
19+
uri_to_iri,
1720
)
1821

1922
log = logging.getLogger(__name__)
@@ -538,6 +541,55 @@ def graph_from_catalog(self, catalog_dict, catalog_ref):
538541
g.add((catalog_ref, RDF.type, DCAT.Catalog))
539542

540543

544+
class MultiLangProfile(RDFProfile):
545+
def _add_multilang_value(
546+
self, subject, predicate, key=None, data_dict=None, multilang_values=None
547+
):
548+
if not multilang_values and data_dict and key:
549+
multilang_values = data_dict.get(key)
550+
if multilang_values:
551+
try:
552+
for key, values in multilang_values.items():
553+
if values:
554+
# the values can be either a multilang-dict or they are
555+
# nested in another iterable (e.g. keywords)
556+
if not isinstance(values, list):
557+
values = [values]
558+
for value in values:
559+
if value:
560+
self.g.add(
561+
(subject, predicate, Literal(value, lang=key))
562+
)
563+
# if multilang_values is not iterable, it is simply added as a non-
564+
# translated Literal
565+
except AttributeError:
566+
self.g.add((subject, predicate, Literal(multilang_values)))
567+
568+
def _add_multilang_triples_from_dict(self, _dict, subject, items):
569+
for item in items:
570+
key, predicate, fallbacks, _type = item
571+
self._add_multilang_triple_from_dict(
572+
_dict, subject, predicate, key, fallbacks=fallbacks
573+
)
574+
575+
def _add_multilang_triple_from_dict(
576+
self, _dict, subject, predicate, key, fallbacks=None
577+
):
578+
"""
579+
Adds a new multilang triple to the graph with the provided parameters
580+
581+
The subject and predicate of the triple are passed as the relevant
582+
RDFLib objects (URIRef or BNode). The object is always a literal value,
583+
which is extracted from the dict using the provided key (see
584+
`_get_dict_value`).
585+
"""
586+
value = self._get_dict_value(_dict, key)
587+
588+
if value:
589+
self._add_multilang_value(subject, predicate, multilang_values=value)
590+
591+
592+
541593
class SwissSchemaOrgProfile(SchemaOrgProfile, MultiLangProfile):
542594
def _basic_fields_graph(self, dataset_ref, dataset_dict):
543595
items = [
@@ -558,13 +610,13 @@ def _basic_fields_graph(self, dataset_ref, dataset_dict):
558610

559611
def _publisher_graph(self, dataset_ref, dataset_dict):
560612
if any(
561-
[
562-
self._get_dataset_value(dataset_dict, "publisher_uri"),
563-
self._get_dataset_value(dataset_dict, "publisher_name"),
564-
dataset_dict.get("organization"),
565-
]
613+
[
614+
self._get_dataset_value(dataset_dict, "publisher_uri"),
615+
self._get_dataset_value(dataset_dict, "publisher_name"),
616+
dataset_dict.get("organization"),
617+
]
566618
):
567-
publisher_uri, publisher_name = dh.get_publisher_dict_from_dataset(
619+
publisher_uri, publisher_name = get_publisher_dict_from_dataset(
568620
dataset_dict.get("publisher")
569621
)
570622
if publisher_uri:
@@ -591,7 +643,7 @@ def _publisher_graph(self, dataset_ref, dataset_dict):
591643

592644
publisher_url = self._get_dataset_value(dataset_dict, "publisher_url")
593645
if not publisher_url and dataset_dict.get("organization"):
594-
publisher_url = dataset_dict["organization"].get("url") or config.get(
646+
publisher_url = dataset_dict["organization"].get("url") or tk.config.get(
595647
"ckan.site_url", ""
596648
)
597649

@@ -667,7 +719,8 @@ def contact_details(self, dataset_dict, dataset_ref, g):
667719
if not contact_point.get("email") or not contact_point.get("name"):
668720
continue
669721
contact_details = BNode()
670-
contact_point_email = EMAIL_MAILTO_PREFIX + contact_point["email"]
722+
723+
contact_point_email = f"mailto:{contact_point['email']}"
671724
contact_point_name = contact_point["name"]
672725

673726
g.add((contact_details, RDF.type, VCARD.Organization))
@@ -683,7 +736,7 @@ def download_access_url(self, resource_dict, distribution, g):
683736
download_url = resource_dict.get("download_url")
684737
if download_url:
685738
try:
686-
download_url = dh.uri_to_iri(download_url)
739+
download_url = uri_to_iri(download_url)
687740
g.add((distribution, SCHEMA.downloadURL, URIRef(download_url)))
688741
except ValueError:
689742
# only add valid URL
@@ -692,7 +745,7 @@ def download_access_url(self, resource_dict, distribution, g):
692745
url = resource_dict.get("url")
693746
if (url and not download_url) or (url and url != download_url):
694747
try:
695-
url = dh.uri_to_iri(url)
748+
url = uri_to_iri(url)
696749
g.add((distribution, SCHEMA.accessURL, URIRef(url)))
697750
except ValueError:
698751
# only add valid URL
@@ -703,16 +756,14 @@ def download_access_url(self, resource_dict, distribution, g):
703756
return g
704757

705758
def graph_from_dataset(self, dataset_dict, dataset_ref):
706-
dataset_uri = dh.dataset_uri(dataset_dict, dataset_ref)
707-
dataset_ref = URIRef(dataset_uri)
708759
g = self.g
709760

710761
# Contact details
711762
self.contact_details(dataset_dict, dataset_ref, g)
712763

713764
# Resources
714765
for resource_dict in dataset_dict.get("resources", []):
715-
distribution = URIRef(dh.resource_uri(resource_dict))
766+
distribution = URIRef(resource_uri(resource_dict))
716767

717768
g.add((dataset_ref, SCHEMA.distribution, distribution))
718769
g.add((distribution, RDF.type, SCHEMA.Distribution))
@@ -767,4 +818,3 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
767818

768819
def parse_dataset(self, dataset_dict, dataset_ref):
769820
super(SwissSchemaOrgProfile, self).parse_dataset(dataset_dict, dataset_ref)
770-

ckanext/switzerland/helpers.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
import unicodedata
66
from collections import OrderedDict, defaultdict
77
from datetime import datetime
8+
from urllib.parse import urlparse
89
from zoneinfo import ZoneInfo
910

1011
import ckan.plugins.toolkit as tk
12+
import iribaker
1113
import requests
1214
from ckan.common import _
1315
from ckan.lib.helpers import _link_to
@@ -676,3 +678,31 @@ def ogdch_get_default_terms_of_use():
676678
"name": _("Terms of use opentransportdata.swiss"),
677679
"url": f"https://opentransportdata.swiss/{ _('en/terms-of-use') }",
678680
}
681+
682+
683+
def get_publisher_dict_from_dataset(publisher):
684+
if not publisher:
685+
return None, None
686+
if not isinstance(publisher, dict):
687+
publisher = json.loads(publisher)
688+
return publisher.get("url"), publisher.get("name")
689+
690+
691+
def uri_to_iri(uri):
692+
"""
693+
convert URI to IRI (used for RDF)
694+
this function also validates the URI and throws a ValueError if the
695+
provided URI is invalid
696+
"""
697+
if not uri:
698+
raise ValueError("Provided URI is empty or None")
699+
700+
result = urlparse(uri)
701+
if not result.scheme or not result.netloc or result.netloc == "-":
702+
raise ValueError("Provided URI does not have a valid schema or netloc")
703+
704+
try:
705+
iri = iribaker.to_iri(uri)
706+
return iri
707+
except Exception as e:
708+
raise ValueError(f"Provided URI can't be converted to IRI: {e}")

0 commit comments

Comments
 (0)