Skip to content

Commit 53a4fcc

Browse files
Merge branch 'develop'
2 parents b04627c + f298ac2 commit 53a4fcc

File tree

13 files changed

+160
-43
lines changed

13 files changed

+160
-43
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ def read(file_path):
88
setup(
99
name = 'shexer',
1010
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
11-
version = '2.6.5',
11+
version = '2.6.5.1',
1212
description = 'Automatic schema extraction for RDF graphs',
1313
author = 'Daniel Fernandez-Alvarez',
1414
author_email = 'danifdezalvarez@gmail.com',
1515
url = 'https://github.com/DaniFdezAlvarez/shexer',
16-
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.5.tar.gz',
16+
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.5.1.tar.gz',
1717
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
1818
long_description = read('README.md'),
1919
long_description_content_type='text/markdown',

shexer/consts.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,7 @@
4848
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
4949
"http://www.w3.org/2001/XMLSchema#": "xsd",
5050
"http://xmlns.com/foaf/0.1/": "foaf"
51-
}
51+
}
52+
53+
# WESO-SHAPES-ONTO
54+
FREQ_PROP = "http://weso.es/shexer/ontology/ratio_property_usage"

shexer/core/shexing/class_shexer.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22

3-
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE
3+
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP
44
from shexer.core.shexing.strategy.direct_shexing_strategy import DirectShexingStrategy
55
from shexer.core.shexing.strategy.direct_and_inverse_shexing_strategy import DirectAndInverseShexingStrategy
66
from shexer.utils.target_elements import determine_original_target_nodes_if_needed
@@ -18,7 +18,8 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1818
allow_opt_cardinality=True, disable_exact_cardinality=False,
1919
shapes_namespace=SHAPES_DEFAULT_NAMESPACE, inverse_paths=False,
2020
decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
21-
class_min_iris_dict=None, allow_redundant_or=False, shape_names_dict=None):
21+
class_min_iris_dict=None, allow_redundant_or=False, shape_names_dict=None, frequency_property=FREQ_PROP,
22+
comments_to_annotations=False):
2223
self._class_counts_dict = class_counts_dict
2324
self._class_profile_dict = class_profile_dict if class_profile_dict is not None else self._load_class_profile_dict_from_file(
2425
class_profile_json_file)
@@ -42,6 +43,8 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
4243
self._detect_minimal_iri = detect_minimal_iri
4344
self._allow_redundant_or = allow_redundant_or
4445
self._shape_names_dict = shape_names_dict if shape_names_dict is not None else {}
46+
self._frequency_property = frequency_property
47+
self._comments_to_annotations = comments_to_annotations
4548

4649
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=remove_empty_shapes,
4750
original_target_classes=original_target_classes,

shexer/core/shexing/class_shexer_fed_sources.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from shexer.core.shexing.class_shexer import ClassShexer
2-
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE
3-
from shexer.consts import RATIO_INSTANCES
2+
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP, RATIO_INSTANCES
43
from shexer.core.instances.pconsts import FEDERATION_TAG_MARK
54

65
_COMMENT_FED_PROPERTY = "# Constraint only observed in {}"
@@ -15,13 +14,15 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1514
disable_comments=False, namespaces_dict=None, tolerance_to_keep_similar_rules=0,
1615
allow_opt_cardinality=True, disable_exact_cardinality=False, shapes_namespace=SHAPES_DEFAULT_NAMESPACE,
1716
inverse_paths=False, decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
18-
class_min_iris_dict=None, allow_redundant_or=False, fed_sources=None, shape_names_dict=None):
17+
class_min_iris_dict=None, allow_redundant_or=False, fed_sources=None, shape_names_dict=None,
18+
frequency_property=FREQ_PROP, comments_to_annotations=False):
1919
super().__init__(class_counts_dict, class_profile_dict, class_profile_json_file, remove_empty_shapes,
2020
original_target_classes, original_shape_map, discard_useless_constraints_with_positive_closure,
2121
keep_less_specific, all_compliant_mode, instantiation_property, disable_or_statements,
2222
disable_comments, namespaces_dict, tolerance_to_keep_similar_rules, allow_opt_cardinality,
2323
disable_exact_cardinality, shapes_namespace, inverse_paths, decimals, instances_report_mode,
24-
detect_minimal_iri, class_min_iris_dict, allow_redundant_or, shape_names_dict)
24+
detect_minimal_iri, class_min_iris_dict, allow_redundant_or, shape_names_dict,
25+
frequency_property, comments_to_annotations)
2526
self._fed_sources = fed_sources
2627

2728
def shex_classes(self, acceptance_threshold=0,

shexer/core/shexing/strategy/abstract_shexing_strategy.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def __init__(self, class_shexer):
3636
self._statement_serializer_factory = StSerializerFactory(freq_mode=class_shexer._instances_report_mode,
3737
decimals=class_shexer._decimals,
3838
instantiation_property_str=self._instantiation_property_str,
39-
disable_comments=self._disable_comments)
39+
disable_comments=self._disable_comments,
40+
frequency_property=self._class_shexer._frequency_property,
41+
namespaces_dict=self._namespaces_dict,
42+
comments_to_annotations=class_shexer._comments_to_annotations)
4043

4144

4245
def yield_base_shapes(self, acceptance_threshold):
@@ -99,7 +102,7 @@ def _change_statement_cardinality_to_all_compliant(self, statement):
99102
statement.cardinality = OPT_CARDINALITY if \
100103
self._allow_opt_cardinality and statement.cardinality == 1 \
101104
else KLEENE_CLOSURE
102-
statement.probability = 1
105+
# statement.probability = 1
103106

104107

105108
@staticmethod

shexer/io/shex/formater/statement_serializers/base_statement_serializer.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,46 @@
33
from shexer.model.const_elem_types import IRI_ELEM_TYPE, BNODE_ELEM_TYPE, NONLITERAL_ELEM_TYPE
44
from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
55
from shexer.utils.shapes import prefixize_shape_name_if_possible
6+
from shexer.utils.uri import prefixize_uri_if_possible
7+
from shexer.consts import FREQ_PROP
68

79
_INVERSE_SENSE_SHEXC = "^"
10+
_ANNOTATION_BEGIN = "//"
11+
_FREQUENCY_PATTERN = "{:.3f}"
12+
813

914
class BaseStatementSerializer(object):
1015

11-
def __init__(self, instantiation_property_str, frequency_serializer, disable_comments=False, is_inverse=False):
16+
def __init__(self, instantiation_property_str, frequency_serializer, disable_comments=False, is_inverse=False,
17+
frequency_property=FREQ_PROP,
18+
namespaces_dict=None,
19+
comments_to_annotations=False):
1220
self._instantiation_property_str = instantiation_property_str
1321
self._disable_comments = disable_comments
1422
self._is_inverse = is_inverse
1523
self._frequency_serializer = frequency_serializer
24+
self._frequency_property = frequency_property
25+
self._namespaces_dict=namespaces_dict
26+
self._comments_to_annotations = comments_to_annotations
1627

17-
def serialize_statement_with_indent_level(self, a_statement, is_last_statement_of_shape, namespaces_dict):
28+
def serialize_statement_with_indent_level(self, a_statement, is_last_statement_of_shape):
1829
tuples_line_indent = []
19-
st_property = BaseStatementSerializer.tune_token(a_statement.st_property, namespaces_dict)
30+
st_property = BaseStatementSerializer.tune_token(a_statement.st_property, self._namespaces_dict)
2031
st_target_element = self.str_of_target_element(target_element=a_statement.st_type,
21-
st_property=a_statement.st_property,
22-
namespaces_dict=namespaces_dict)
32+
st_property=a_statement.st_property)
2333
cardinality = BaseStatementSerializer.cardinality_representation(
2434
statement=a_statement,
2535
out_of_comment=True)
26-
result = self._sense_flag() + st_property + SPACES_GAP_BETWEEN_TOKENS + st_target_element + SPACES_GAP_BETWEEN_TOKENS + \
27-
cardinality + BaseStatementSerializer.closure_of_statement(is_last_statement_of_shape)
28-
36+
if self._comments_to_annotations:
37+
annotations = self._build_constraint_annotations(a_statement)
38+
result = self._sense_flag() + st_property + SPACES_GAP_BETWEEN_TOKENS + st_target_element + SPACES_GAP_BETWEEN_TOKENS + \
39+
cardinality + \
40+
SPACES_GAP_BETWEEN_TOKENS + annotations + SPACES_GAP_BETWEEN_TOKENS + \
41+
BaseStatementSerializer.closure_of_statement(is_last_statement_of_shape)
42+
else:
43+
result = self._sense_flag() + st_property + SPACES_GAP_BETWEEN_TOKENS + st_target_element + SPACES_GAP_BETWEEN_TOKENS + \
44+
cardinality + \
45+
BaseStatementSerializer.closure_of_statement(is_last_statement_of_shape)
2946
if a_statement.cardinality not in [KLEENE_CLOSURE, OPT_CARDINALITY] and not self._disable_comments:
3047
result += BaseStatementSerializer.adequate_amount_of_final_spaces(result)
3148
result += a_statement.probability_representation()
@@ -36,17 +53,26 @@ def serialize_statement_with_indent_level(self, a_statement, is_last_statement_o
3653

3754
return tuples_line_indent
3855

39-
def str_of_target_element(self, target_element, st_property, namespaces_dict):
56+
def _build_constraint_annotations(self, a_statement):
57+
return SPACES_GAP_BETWEEN_TOKENS.join((_ANNOTATION_BEGIN,
58+
prefixize_uri_if_possible(target_uri=self._frequency_property,
59+
namespaces_prefix_dict=self._namespaces_dict,
60+
corners=False),
61+
self._format_frequency(a_statement.probability)
62+
))
63+
64+
def _format_frequency(self, frequency_raw_number):
65+
return _FREQUENCY_PATTERN.format(frequency_raw_number)
66+
def str_of_target_element(self, target_element, st_property):
4067
"""
4168
Special treatment for instantiation_property. We build a value set with an specific URI
4269
:param target_element:
4370
:param st_property:
44-
:param namespaces_dict:
4571
:return:
4672
"""
4773
if st_property == self._instantiation_property_str:
48-
return "[" + BaseStatementSerializer.tune_token(target_element, namespaces_dict) + "]"
49-
return BaseStatementSerializer.tune_token(target_element, namespaces_dict)
74+
return "[" + BaseStatementSerializer.tune_token(target_element, self._namespaces_dict) + "]"
75+
return BaseStatementSerializer.tune_token(target_element, self._namespaces_dict)
5076

5177
@staticmethod
5278
def tune_token(a_token, namespaces_dict):

shexer/io/shex/formater/statement_serializers/fixed_prop_choice_statement_serializer.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
from shexer.io.shex.formater.statement_serializers.base_statement_serializer import BaseStatementSerializer
22
from shexer.io.shex.formater.consts import SPACES_GAP_BETWEEN_TOKENS, KLEENE_CLOSURE, OPT_CARDINALITY
3+
from shexer.consts import FREQ_PROP
34

45

56
class FixedPropChoiceStatementSerializer(BaseStatementSerializer):
67

7-
def __init__(self, instantiation_property_str, frequency_serializer, disable_comments=False, is_inverse=False):
8+
def __init__(self, instantiation_property_str, frequency_serializer, disable_comments=False, is_inverse=False,
9+
frequency_property=FREQ_PROP, namespaces_dict=None, comments_to_annotations=False):
810
super(FixedPropChoiceStatementSerializer, self).__init__(instantiation_property_str=instantiation_property_str,
911
disable_comments=disable_comments,
1012
is_inverse=is_inverse,
11-
frequency_serializer=frequency_serializer)
13+
frequency_serializer=frequency_serializer,
14+
frequency_property=frequency_property,
15+
namespaces_dict=namespaces_dict,
16+
comments_to_annotations=comments_to_annotations)
1217

13-
def serialize_statement_with_indent_level(self, a_statement, is_last_statement_of_shape, namespaces_dict):
18+
def serialize_statement_with_indent_level(self, a_statement, is_last_statement_of_shape):
1419
tuples_line_indent = []
15-
st_property = BaseStatementSerializer.tune_token(a_statement.st_property, namespaces_dict)
20+
st_property = BaseStatementSerializer.tune_token(a_statement.st_property, self._namespaces_dict)
1621
st_target_elements = []
1722
for a_type in a_statement.st_types:
1823
st_target_elements.append(self.str_of_target_element(target_element=a_type,
19-
st_property=a_statement.st_property,
20-
namespaces_dict=namespaces_dict))
24+
st_property=a_statement.st_property))
2125

2226
content_line = st_property + SPACES_GAP_BETWEEN_TOKENS
2327
content_line += (SPACES_GAP_BETWEEN_TOKENS + "OR" + SPACES_GAP_BETWEEN_TOKENS).join(st_target_elements)

shexer/io/shex/formater/statement_serializers/st_serializers_factory.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,42 @@ class StSerializerFactory(object):
1414
1515
"""
1616

17-
def __init__(self, freq_mode, decimals, instantiation_property_str, disable_comments):
17+
def __init__(self, freq_mode, decimals, instantiation_property_str, disable_comments, frequency_property, namespaces_dict, comments_to_annotations):
1818
self._freq_serializer = self._build_freq_serializer(freq_mode=freq_mode,
1919
decimals=decimals)
2020

2121
self._direct_base = BaseStatementSerializer(
2222
instantiation_property_str=instantiation_property_str,
2323
disable_comments=disable_comments,
2424
is_inverse=False,
25-
frequency_serializer=self._freq_serializer)
25+
frequency_serializer=self._freq_serializer,
26+
frequency_property=frequency_property,
27+
namespaces_dict=namespaces_dict,
28+
comments_to_annotations=comments_to_annotations)
2629
self._inverse_base = BaseStatementSerializer(
2730
instantiation_property_str=instantiation_property_str,
2831
disable_comments=disable_comments,
2932
is_inverse=True,
30-
frequency_serializer=self._freq_serializer)
33+
frequency_serializer=self._freq_serializer,
34+
frequency_property=frequency_property,
35+
namespaces_dict=namespaces_dict,
36+
comments_to_annotations=comments_to_annotations)
3137
self._direct_choice = FixedPropChoiceStatementSerializer(
3238
instantiation_property_str=instantiation_property_str,
3339
disable_comments=disable_comments,
3440
is_inverse=False,
35-
frequency_serializer=self._freq_serializer)
41+
frequency_serializer=self._freq_serializer,
42+
frequency_property=frequency_property,
43+
namespaces_dict=namespaces_dict,
44+
comments_to_annotations=comments_to_annotations)
3645
self._inverse_choice = FixedPropChoiceStatementSerializer(
3746
instantiation_property_str=instantiation_property_str,
3847
disable_comments=disable_comments,
3948
is_inverse=True,
40-
frequency_serializer=self._freq_serializer)
49+
frequency_serializer=self._freq_serializer,
50+
frequency_property=frequency_property,
51+
namespaces_dict=namespaces_dict,
52+
comments_to_annotations=comments_to_annotations)
4153

4254
def get_base_serializer(self, is_inverse):
4355
return self._direct_base if not is_inverse else self._inverse_base

shexer/model/statement.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ def __init__(self, st_property, st_type, cardinality, n_occurences,
1818
def get_tuples_to_serialize_line_indent_level(self, is_last_statement_of_shape, namespaces_dict):
1919
return self._serializer_object.\
2020
serialize_statement_with_indent_level(a_statement=self,
21-
is_last_statement_of_shape= is_last_statement_of_shape,
22-
namespaces_dict=namespaces_dict)
21+
is_last_statement_of_shape= is_last_statement_of_shape)
2322

2423
def probability_representation(self):
2524
return self._serializer_object.probability_representation(self)

shexer/shaper.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from shexer.consts import SHEXC, SHACL_TURTLE, NT, TSV_SPO, N3, TURTLE, TURTLE_ITER, \
44
RDF_XML, FIXED_SHAPE_MAP, JSON_LD, RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, ZIP, GZ, XZ, \
5-
ALL_EXAMPLES, CONSTRAINT_EXAMPLES, SHAPE_EXAMPLES
5+
ALL_EXAMPLES, CONSTRAINT_EXAMPLES, SHAPE_EXAMPLES, FREQ_PROP
66
from shexer.utils.factories.class_profiler_factory import get_class_profiler
77
from shexer.utils.factories.instance_tracker_factory import get_instance_tracker
88
from shexer.utils.factories.class_shexer_factory import get_class_shexer
@@ -64,7 +64,9 @@ def __init__(self, target_classes=None,
6464
allow_redundant_or=False,
6565
instances_cap=-1,
6666
examples_mode=None,
67-
federated_sources=None # could be a list
67+
federated_sources=None, # could be a list
68+
comments_to_annotations=False,
69+
frequency_property=FREQ_PROP
6870
):
6971
"""
7072
@@ -197,6 +199,9 @@ def __init__(self, target_classes=None,
197199
self._namespaces_for_qualifier_props = namespaces_for_qualifier_props
198200
self._shapes_namespace = shapes_namespace
199201

202+
self._comments_to_annotations=comments_to_annotations
203+
self._frequency_property=frequency_property
204+
200205
#The following two atts are used for optimizations
201206
self._built_remote_graph = get_remote_graph_if_needed(endpoint_url=url_endpoint,
202207
store_locally=not disable_endpoint_cache)
@@ -356,7 +361,9 @@ def _build_class_shexer(self):
356361
class_min_iris=self._class_min_iris,
357362
allow_redundant_or=self._allow_redundant_or,
358363
federated_sources=self._federated_sources,
359-
shape_names=self._shape_names)
364+
shape_names=self._shape_names,
365+
frequency_property=self._frequency_property,
366+
comments_to_annotations=self._comments_to_annotations)
360367

361368
def _build_shapes_serializer(self, target_file, string_return, output_format, rdfconfig_directory, verbose):
362369
return get_shape_serializer(shapes_list=self._shape_list,

0 commit comments

Comments
 (0)