Skip to content

Commit b04627c

Browse files
Merge branch 'develop'
2 parents 63357a3 + 48edefd commit b04627c

23 files changed

+232
-104
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ def read(file_path):
88
setup(
99
name = 'shexer',
1010
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
11-
version = '2.6.4',
11+
version = '2.6.5',
1212
description = 'Automatic schema extraction for RDF graphs',
1313
author = 'Daniel Fernandez-Alvarez',
1414
author_email = 'danifdezalvarez@gmail.com',
1515
url = 'https://github.com/DaniFdezAlvarez/shexer',
16-
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.4.tar.gz',
16+
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.5.tar.gz',
1717
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
1818
long_description = read('README.md'),
1919
long_description_content_type='text/markdown',

shexer/core/profiling/class_profiler.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class ClassProfiler(object):
2121
def __init__(self, triples_yielder, instances_dict, instantiation_property_str=RDF_TYPE_STR,
2222
remove_empty_shapes=True, original_target_classes=None, original_shape_map=None,
2323
shapes_namespace=SHAPES_DEFAULT_NAMESPACE, inverse_paths=False, detect_minimal_iri=False,
24-
examples_mode=None):
24+
examples_mode=None, namespaces_dict=None):
2525
self._triples_yielder = triples_yielder
2626
self._instances_dict = instances_dict # TODO refactor: change name once working again
2727
# self._instances_shape_dict = {}
@@ -36,11 +36,10 @@ def __init__(self, triples_yielder, instances_dict, instantiation_property_str=R
3636
self._detect_minimal_iri = detect_minimal_iri
3737
self._examples_mode = examples_mode
3838
self._inverse_paths = inverse_paths
39+
self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {}
40+
self._original_shape_map = original_shape_map
3941

40-
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=remove_empty_shapes,
41-
original_target_classes=original_target_classes,
42-
original_shape_map=original_shape_map,
43-
shapes_namespace=shapes_namespace)
42+
self._original_target_nodes = None # Will be filled during execution
4443

4544
if detect_minimal_iri or examples_mode is not None:
4645
self._shape_feature_examples = ShapeExampleFeaturesDict(track_inverse_features=inverse_paths)
@@ -76,7 +75,8 @@ def profile_classes(self, verbose):
7675
log_msg(verbose=verbose,
7776
msg="Mimimal IRIs detected...")
7877
return self._classes_shape_dict, self._class_counts, \
79-
self._shape_feature_examples if (self._detect_minimal_iri or self._examples_mode is not None) else None
78+
self._shape_feature_examples if (self._detect_minimal_iri or self._examples_mode is not None) else None,\
79+
self._shape_names_dict
8080

8181
def get_target_classes_dict(self):
8282
return self._instances_dict
@@ -169,6 +169,10 @@ def _build_class_profile(self):
169169
def _clean_class_profile(self):
170170
if not self._remove_empty_shapes:
171171
return
172+
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=self._remove_empty_shapes,
173+
original_target_classes=self._original_raw_target_classes,
174+
original_shape_map=self._original_shape_map,
175+
shape_names_dict=self._shape_names_dict)
172176
shapes_to_remove = self._detect_shapes_to_remove()
173177

174178
while len(shapes_to_remove) != 0:

shexer/core/profiling/federated_source_class_profiler.py

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,66 +12,14 @@ class FederatedSourceClassProfiler(ClassProfiler):
1212
def __init__(self, triples_yielder, instances_dict, instantiation_property_str=RDF_TYPE_STR,
1313
remove_empty_shapes=True, original_target_classes=None, original_shape_map=None,
1414
shapes_namespace=SHAPES_DEFAULT_NAMESPACE, inverse_paths=False, detect_minimal_iri=False,
15-
examples_mode=None, list_of_federated_objects=None):
15+
examples_mode=None, list_of_federated_objects=None, namespaces_dict=None):
1616
super().__init__(triples_yielder, instances_dict, instantiation_property_str, remove_empty_shapes,
1717
original_target_classes, original_shape_map, shapes_namespace, inverse_paths,
18-
detect_minimal_iri, examples_mode)
18+
detect_minimal_iri, examples_mode, namespaces_dict)
1919

2020
self._list_of_federated_objects = list_of_federated_objects
2121

2222

23-
# TODO CHECK ENTIRE PRIFILING PROCESS. maybe there is only need to redefine the yield_triples as it's already done.
24-
# def profile_classes(self, verbose):
25-
# log_msg(verbose=verbose,
26-
# msg="Starting class profiler...")
27-
# self._init_class_counts_and_shape_dict()
28-
# log_msg(verbose=verbose,
29-
# msg="Instance counts completed. Annotating instance features...")
30-
# self._adapt_instances_dict()
31-
# self._build_shape_of_instances()
32-
# log_msg(verbose=verbose,
33-
# msg="Instance features annotated. Number of relevant triples computed: {}. "
34-
# "Building shape profiles...".format(self._relevant_triples))
35-
#
36-
# self._build_class_profile()
37-
# log_msg(verbose=verbose,
38-
# msg="Draft shape profiles built. Cleaning shape profiles...")
39-
# self._clean_class_profile()
40-
# log_msg(verbose=verbose,
41-
# msg="Shape profiles done. Working with {} shapes.".format(len(self._classes_shape_dict)))
42-
# if self._detect_minimal_iri or self._examples_mode in [SHAPE_EXAMPLES, ALL_EXAMPLES]:
43-
# log_msg(verbose=verbose,
44-
# msg="Detecting example features for each shape...")
45-
# self._init_anotation_example_method()
46-
# self._detect_example_features()
47-
# log_msg(verbose=verbose,
48-
# msg="Mimimal IRIs detected...")
49-
# return self._classes_shape_dict, self._class_counts, \
50-
# self._shape_feature_examples if (self._detect_minimal_iri or self._examples_mode is not None) else None
51-
52-
53-
54-
55-
# def _complete_class_counts_with_fed_sources(self):
56-
# # There whould not be any new class, shape, as they are all mentioned in the base dict.
57-
# # All we have to do here is to add class counts wlaking the fed_instance_dicts
58-
# for a_fed_source in self._list_of_federated_objects:
59-
# self._complete_class_count_of_fed_source(a_fed_source)
60-
#
61-
# def _complete_class_count_of_fed_source(self, federated_source_obj):
62-
# for an_instance, class_list in federated_source_obj.instances_dict.items():
63-
# for a_class in class_list:
64-
# # if a_class not in self._c_shapes_dict:
65-
# # self._c_shapes_dict[a_class] = {}
66-
# # self._c_counts[a_class] = 0
67-
# self._c_counts[a_class] += 1
68-
#
69-
# def _complete_instance_dict_adaptation(self):
70-
# for a_fed_source in self._list_of_federated_objects:
71-
# if not self._inverse_paths:
72-
# self._adapt_i_dict_direct_of_a_
73-
74-
7523
def _yield_relevant_triples(self):
7624
for a_triple in self._triples_yielder.yield_triples():
7725
if self._strategy.is_a_relevant_triple(a_triple):

shexer/core/profiling/strategy/abstract_feature_direction_strategy.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from shexer.utils.shapes import build_shapes_name_for_class_uri
2-
from shexer.core.profiling.consts import POS_CLASSES, _S, _P, _O, POS_FEATURES_DIRECT, _ONE_TO_MANY, POS_FEATURES_INVERSE
2+
from shexer.core.profiling.consts import POS_CLASSES, _S, _P, _O, POS_FEATURES_DIRECT, _ONE_TO_MANY
33
from shexer.model.IRI import IRI_ELEM_TYPE, IRI
44
from shexer.model.bnode import BNode, BNODE_ELEM_TYPE
5+
from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
56

67
class AbstractFeatureDirectionStrategy(object):
78

@@ -14,6 +15,7 @@ def __init__(self, class_profiler):
1415
self._original_raw_target_classes = self._class_profiler._original_raw_target_classes
1516
self._detect_minimal_iri = self._class_profiler._detect_minimal_iri
1617
self._examples_mode = self._class_profiler._examples_mode
18+
self._namespaces_dict = self._class_profiler._namespaces_dict
1719
if self._detect_minimal_iri or self._examples_mode is not None:
1820
self._shape_feature_examples = self._class_profiler._shape_feature_examples
1921

@@ -48,6 +50,12 @@ def _init_annotated_direct_features(self):
4850
self._c_shapes_dict[a_class] = {}
4951
self._c_counts[a_class] = 0
5052
self._c_counts[a_class] += 1
53+
if a_class not in self._shape_names_dict:
54+
self._shape_names_dict[a_class] = \
55+
build_shapes_name_for_class_uri(class_uri=a_class,
56+
shapes_namespace=self._class_profiler._shapes_namespace,
57+
shape_names_dict=self._shape_names_dict,
58+
namespace_prefix_dict=self._namespaces_dict)
5159

5260
def _annotate_direct_instance_features(self, an_instance):
5361
direct_feautres_3tuple = self._infer_direct_3tuple_features(an_instance)
@@ -67,7 +75,7 @@ def _infer_direct_3tuple_features(self, an_instance):
6775

6876
def _infer_valid_cardinalities(self, a_property, a_cardinality):
6977
"""
70-
Special teratment for self._instantiation_property_str. If thats the property, we are targetting specific URIs
78+
Special teratment for self._instantiation_property_str. If that's the property, we are targetting specific URIs
7179
instead of the type IRI.
7280
Cardinality will be always "1"
7381
:param a_property:
@@ -119,15 +127,19 @@ def _decide_shapes_elem(self, str_elem):
119127
for a_class in self._i_dict[str_elem][POS_CLASSES]]
120128

121129
def _get_shape_name_for_a_class(self, a_class):
122-
self._assign_shape_name_if_needed(a_class)
123-
return self._shape_names_dict[a_class]
124-
125-
def _assign_shape_name_if_needed(self, a_class):
126-
if a_class in self._shape_names_dict:
127-
return
128-
self._shape_names_dict[a_class] = \
129-
build_shapes_name_for_class_uri(class_uri=a_class,
130-
shapes_namespace=self._class_profiler._shapes_namespace)
130+
# self._assign_shape_name_if_needed(a_class)
131+
if a_class.startswith("<"):
132+
return self._shape_names_dict[a_class]
133+
return f"{STARTING_CHAR_FOR_SHAPE_NAME}<{self._shape_names_dict[a_class]}>"
134+
135+
# def _assign_shape_name_if_needed(self, a_class):
136+
# if a_class in self._shape_names_dict:
137+
# return
138+
# self._shape_names_dict[a_class] = \
139+
# build_shapes_name_for_class_uri(class_uri=a_class,
140+
# shapes_namespace=self._class_profiler._shapes_namespace,
141+
# shape_names_dict=self._shape_names_dict,
142+
# namespace_prefix_dict=self._namespaces_dict)
131143

132144
def _annotate_target_subject(self, a_triple):
133145
str_subj = a_triple[_S].iri

shexer/core/profiling/strategy/direct_features_strategy.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
from shexer.utils.shapes import build_shapes_name_for_class_uri
22
from shexer.core.profiling.strategy.abstract_feature_direction_strategy import AbstractFeatureDirectionStrategy
33
from shexer.core.profiling.consts import _S, _P, _O, POS_CLASSES
44

@@ -35,6 +35,11 @@ def init_original_targets(self):
3535
for a_class in self._original_raw_target_classes:
3636
self._c_shapes_dict[a_class] = {}
3737
self._c_counts[a_class] = 0
38+
self._shape_names_dict[a_class] = \
39+
build_shapes_name_for_class_uri(class_uri=a_class,
40+
shapes_namespace=self._class_profiler._shapes_namespace,
41+
shape_names_dict=self._shape_names_dict,
42+
namespace_prefix_dict=self._namespaces_dict)
3843

3944
def has_shape_annotated_features(self, shape_label):
4045
if shape_label not in self._c_shapes_dict:

shexer/core/profiling/strategy/include_reverse_features_strategy.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from shexer.utils.shapes import build_shapes_name_for_class_uri
12
from shexer.core.profiling.strategy.abstract_feature_direction_strategy import AbstractFeatureDirectionStrategy
23
from shexer.core.profiling.consts import _S, _P, _O, POS_FEATURES_INVERSE, POS_CLASSES
34
from shexer.model.IRI import IRI_ELEM_TYPE
@@ -39,6 +40,12 @@ def init_annotated_targets(self):
3940
self._c_shapes_dict[a_class] = ({}, {})
4041
self._c_counts[a_class] = 0
4142
self._c_counts[a_class] += 1
43+
if a_class not in self._shape_names_dict:
44+
self._shape_names_dict[a_class] = \
45+
build_shapes_name_for_class_uri(class_uri=a_class,
46+
shapes_namespace=self._class_profiler._shapes_namespace,
47+
shape_names_dict=self._shape_names_dict,
48+
namespace_prefix_dict=self._namespaces_dict)
4249

4350
def init_original_targets(self):
4451
if self._original_raw_target_classes:

shexer/core/shexing/class_shexer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1818
allow_opt_cardinality=True, disable_exact_cardinality=False,
1919
shapes_namespace=SHAPES_DEFAULT_NAMESPACE, inverse_paths=False,
2020
decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
21-
class_min_iris_dict=None, allow_redundant_or=False):
21+
class_min_iris_dict=None, allow_redundant_or=False, shape_names_dict=None):
2222
self._class_counts_dict = class_counts_dict
2323
self._class_profile_dict = class_profile_dict if class_profile_dict is not None else self._load_class_profile_dict_from_file(
2424
class_profile_json_file)
@@ -41,11 +41,12 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
4141
self._instances_report_mode = instances_report_mode
4242
self._detect_minimal_iri = detect_minimal_iri
4343
self._allow_redundant_or = allow_redundant_or
44+
self._shape_names_dict = shape_names_dict if shape_names_dict is not None else {}
4445

4546
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=remove_empty_shapes,
4647
original_target_classes=original_target_classes,
4748
original_shape_map=original_shape_map,
48-
shapes_namespace=shapes_namespace)
49+
shape_names_dict=shape_names_dict)
4950
self._strategy = DirectShexingStrategy(self) if not inverse_paths \
5051
else DirectAndInverseShexingStrategy(self)
5152

shexer/core/shexing/class_shexer_fed_sources.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1515
disable_comments=False, namespaces_dict=None, tolerance_to_keep_similar_rules=0,
1616
allow_opt_cardinality=True, disable_exact_cardinality=False, shapes_namespace=SHAPES_DEFAULT_NAMESPACE,
1717
inverse_paths=False, decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
18-
class_min_iris_dict=None, allow_redundant_or=False, fed_sources=None):
18+
class_min_iris_dict=None, allow_redundant_or=False, fed_sources=None, shape_names_dict=None):
1919
super().__init__(class_counts_dict, class_profile_dict, class_profile_json_file, remove_empty_shapes,
2020
original_target_classes, original_shape_map, discard_useless_constraints_with_positive_closure,
2121
keep_less_specific, all_compliant_mode, instantiation_property, disable_or_statements,
2222
disable_comments, namespaces_dict, tolerance_to_keep_similar_rules, allow_opt_cardinality,
2323
disable_exact_cardinality, shapes_namespace, inverse_paths, decimals, instances_report_mode,
24-
detect_minimal_iri, class_min_iris_dict, allow_redundant_or)
24+
detect_minimal_iri, class_min_iris_dict, allow_redundant_or, shape_names_dict)
2525
self._fed_sources = fed_sources
2626

2727
def shex_classes(self, acceptance_threshold=0,

shexer/core/shexing/strategy/abstract_shexing_strategy.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def __init__(self, class_shexer):
2727
self._all_compliant_mode = self._class_shexer._all_compliant_mode
2828
self._disable_exact_cardinality = self._class_shexer._disable_exact_cardinality
2929
self._allow_redundant_or = self._class_shexer._allow_redundant_or
30+
self._shape_names_dict = self._class_shexer._shape_names_dict
3031

3132
self._strategy_min_iri = AnnotateMinIriStrategy(class_shexer._class_min_iris_dict) \
3233
if class_shexer._detect_minimal_iri \
@@ -267,6 +268,12 @@ def _is_a_literal(self, node_kind_str):
267268
return False
268269
return True
269270

271+
272+
def _shape_name(self, class_key):
273+
if class_key.startswith("<"):
274+
return STARTING_CHAR_FOR_SHAPE_NAME + class_key
275+
return f"{STARTING_CHAR_FOR_SHAPE_NAME}<{self._shape_names_dict[class_key]}>"
276+
270277
class MergeableConstraints(object):
271278
"""
272279
Internal class used to handle constraints created during the voting process that should be merged into a single one.

shexer/core/shexing/strategy/direct_and_inverse_shexing_strategy.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from shexer.core.shexing.strategy.abstract_shexing_strategy import AbstractShexingStrategy
2-
from shexer.utils.shapes import build_shapes_name_for_class_uri
32
from shexer.model.statement import Statement
43
from shexer.model.shape import Shape
54

@@ -14,6 +13,7 @@ def __init__(self, class_shexer):
1413
self._class_profile_dict = self._class_shexer._class_profile_dict
1514
self._shapes_namespace = self._class_shexer._shapes_namespace
1615
self._class_counts_dict = self._class_shexer._class_counts_dict
16+
self._namespaces_dict = self._class_shexer._namespaces_dict
1717

1818
def remove_statements_to_gone_shapes(self, shape, shape_names_to_remove):
1919
shape.direct_statements = self._statements_without_shapes_to_remove(
@@ -25,8 +25,7 @@ def remove_statements_to_gone_shapes(self, shape, shape_names_to_remove):
2525

2626
def _yield_base_shapes_direction_aware(self, acceptance_threshold):
2727
for a_class_key in self._class_profile_dict:
28-
name = build_shapes_name_for_class_uri(class_uri=a_class_key,
29-
shapes_namespace=self._shapes_namespace)
28+
name = self._shape_name(a_class_key)
3029
number_of_instances = float(self._class_counts_dict[a_class_key])
3130

3231
direct_statements = self._build_base_direct_statements(acceptance_threshold, a_class_key,

0 commit comments

Comments
 (0)