Skip to content

Commit 8185d91

Browse files
Merge branch 'develop'
2 parents 7f7da23 + 4409266 commit 8185d91

File tree

4 files changed

+73
-28
lines changed

4 files changed

+73
-28
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ def read(file_path):
88
setup(
99
name = 'shexer',
1010
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
11-
version = '2.6.1',
11+
version = '2.6.2',
1212
description = 'Automatic schema extraction for RDF graphs',
1313
author = 'Daniel Fernandez-Alvarez',
1414
author_email = 'danifdezalvarez@gmail.com',
1515
url = 'https://github.com/DaniFdezAlvarez/shexer',
16-
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.1.tar.gz',
16+
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.2.tar.gz',
1717
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
1818
long_description = read('README.md'),
1919
long_description_content_type='text/markdown',

shexer/io/rdfconfig/formater/rdfconfig_serializer.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from shexer.core.profiling.class_profiler import RDF_TYPE_STR
22
from shexer.utils.file import load_whole_file_content
3-
from shexer.utils.uri import prefixize_uri_if_possible
3+
from shexer.utils.uri import prefixize_uri_if_possible, get_prefix_of_namespace_if_it_exists
4+
from shexer.utils.shapes import build_shapes_name_for_class_uri
45
from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
56
import os
67

@@ -55,31 +56,65 @@ def _serialize_shape(self, shape):
5556
self._serialize_constraint(shape=shape,
5657
constraint=a_constraint)
5758

58-
59-
def _variable_property_name(self, st_property):
60-
original = self._create_var_name_for_property(st_property) # TODO improve
61-
candidate = original
62-
counter = 1
63-
while candidate in self._variables_used:
64-
counter +=1
65-
candidate = original + str(counter)
59+
def _variable_property_name(self, st_property, st_type, shape_uri):
60+
candidate = self._create_var_name_for_property(st_property, shape_uri) # TODO improve
61+
if candidate in self._variables_used:
62+
candidate += self._add_type_tag_to_var_name(st_type)
63+
tmp = candidate
64+
counter = 1
65+
while candidate in self._variables_used:
66+
candidate = tmp + str(counter)
67+
counter += 1
6668
self._variables_used.add(candidate)
6769
return candidate
6870

69-
def _create_var_name_for_property(self, st_property):
70-
st_property.replace("_","")
71-
st_property=st_property.lower()
72-
for i in range(len(st_property) - 1, -1, -1):
73-
if not st_property[i].isalnum():
74-
return st_property[i+1:]
75-
return "var_name"
71+
def _add_type_tag_to_var_name(self, st_type):
72+
st_type.replace("_", "")
73+
st_property = st_type.lower()
74+
candidate = ""
75+
for i in range(len(st_property) - 1, -1, -1): # Locate suffix
76+
if st_property[i] in ["/", "#"]:
77+
return st_property[i + 1:]
78+
return candidate
7679

80+
def _create_var_name_for_property(self, st_property, shape_uri):
81+
st_property.replace("_", "")
82+
st_property = st_property.lower()
83+
candidate = ""
84+
for i in range(len(st_property) - 1, -1, -1): # Locate suffix
85+
if st_property[i] in ["/", "#"]:
86+
candidate = st_property[i + 1:]
87+
break
88+
if candidate != "": # Try to locate prefixed namespace
89+
prefix = get_prefix_of_namespace_if_it_exists(target_uri=st_property,
90+
namespaces_prefix_dict=self._namespaces_dict,
91+
corners=False)
92+
if prefix is not None:
93+
candidate = prefix + "_" + candidate
94+
else: # if there is no suffix, get letters and numbers
95+
for char in st_property:
96+
if not char.isalnum():
97+
candidate += char
98+
shape_tag = self._shape_tag_for_var_name(shape_uri)
99+
return f"{candidate}_of_{shape_tag}"
100+
101+
# return "var_name"
102+
def _shape_tag_for_var_name(self, class_uri):
103+
last_piece = class_uri
104+
if "#" in last_piece and last_piece[-1] != "#":
105+
last_piece = last_piece[last_piece.rfind("#") + 1:]
106+
if "/" in last_piece:
107+
if last_piece[-1] != "/":
108+
last_piece = last_piece[last_piece.rfind("/") + 1:]
109+
else:
110+
last_piece = last_piece[last_piece[:-1].rfind("/") + 1:]
111+
return last_piece
77112
def _create_subject_name_for_shape(self, shape_uri):
78-
shape_uri.replace("_","")
113+
shape_uri.replace("_", "")
79114
shape_uri.replace("-", "")
80115
for i in range(len(shape_uri) - 1, -1, -1):
81116
if not shape_uri[i].isalnum():
82-
return "Shape" + shape_uri[i+1:].capitalize()
117+
return "Shape" + shape_uri[i + 1:].capitalize()
83118
return "SubjectName"
84119

85120
def _shape_subject_name(self, shape_uri):
@@ -94,7 +129,6 @@ def _shape_subject_name(self, shape_uri):
94129
self._subjects_dict[shape_uri] = candidate
95130
return self._subjects_dict[shape_uri]
96131

97-
98132
def _write_shape_line(self, content, indentation):
99133
indentation_str = ' ' * 2 * indentation
100134
self._shapes_stream.write(f"{indentation_str}- {content}\n")
@@ -136,10 +170,18 @@ def _serialize_constraint(self, shape, constraint):
136170
example_cons = example_cons[:example_cons.find("@")]
137171
elif not example_cons.startswith('"'):
138172
example_cons = f'"{example_cons}"'
173+
if len(example_cons) >= 2:
174+
example_cons = example_cons[0] + example_cons[1:-1].replace('"', '\\"') + example_cons[-1]
139175
self._write_shape_line(indentation=_PROPERTY_INDENT_LEVEL,
140176
content=f"{st_property}:")
141177
self._write_shape_line(indentation=_CONSTRAINT_INDENT_LEVEL,
142-
content=f"{self._variable_property_name(constraint.st_property)}: {example_cons}")
178+
content="{}: {}".format(self._variable_property_name(
179+
st_property=constraint.st_property,
180+
st_type=constraint.st_type,
181+
shape_uri=shape.class_uri),
182+
example_cons))
183+
184+
143185

144186
def _serialize_prefixes(self):
145187
with open(self._prefixes_file, "w") as out_stream:
@@ -175,4 +217,3 @@ def _nice_uri(self, target_uri):
175217
return prefixize_uri_if_possible(target_uri=result,
176218
namespaces_prefix_dict=self._namespaces_dict,
177219
corners=True)
178-

shexer/utils/shapes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from shexer.io.shex.formater.consts import SHAPE_LINK_CHAR
44

55
def build_shapes_name_for_class_uri(class_uri, shapes_namespace):
6-
7-
if class_uri.startswith("@"): # special shape case
6+
if class_uri.startswith(SHAPE_LINK_CHAR): # special shape case
87
return class_uri
98
if class_uri.startswith("<") and class_uri.endswith(">"):
109
return STARTING_CHAR_FOR_SHAPE_NAME + class_uri
@@ -21,7 +20,7 @@ def build_shapes_name_for_class_uri(class_uri, shapes_namespace):
2120
if last_piece.startswith("<"):
2221
last_piece = last_piece[1:]
2322
return STARTING_CHAR_FOR_SHAPE_NAME + "<" + shapes_namespace + last_piece + ">" if last_piece is not None else class_uri
24-
# return class_uri
23+
2524

2625

2726
def build_shape_name_for_qualifier_prop_uri(prop_uri, shapes_namespace): # TODO REVIEW!

shexer/utils/uri.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,12 @@ def prefixize_uri_if_possible(target_uri, namespaces_prefix_dict, corners=True):
155155
return target_uri if best_match is None else candidate_uri.replace(best_match, namespaces_prefix_dict[best_match] + ":")
156156

157157

158-
159-
158+
def get_prefix_of_namespace_if_it_exists(target_uri, namespaces_prefix_dict, corners=True):
159+
candidate_uri = remove_corners(target_uri) if corners else target_uri
160+
for a_namespace in namespaces_prefix_dict: # Prefixed element (all literals are prefixed elements)
161+
if candidate_uri.startswith(a_namespace):
162+
if "/" not in candidate_uri[len(a_namespace):] and \
163+
"#" not in candidate_uri[len(a_namespace):]:
164+
return namespaces_prefix_dict[a_namespace]
160165

161166

0 commit comments

Comments
 (0)