Skip to content

Commit b7d700b

Browse files
Add notebook about outputs
1 parent c74632a commit b7d700b

File tree

3 files changed

+345
-2
lines changed

3 files changed

+345
-2
lines changed

doc/outputs.ipynb

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"provenance": []
7+
},
8+
"kernelspec": {
9+
"name": "python3",
10+
"display_name": "Python 3"
11+
},
12+
"language_info": {
13+
"name": "python"
14+
}
15+
},
16+
"cells": [
17+
{
18+
"cell_type": "code",
19+
"execution_count": null,
20+
"metadata": {
21+
"id": "0HH1fi9Iuvmz"
22+
},
23+
"outputs": [],
24+
"source": [
25+
"!pip install shexer"
26+
]
27+
},
28+
{
29+
"cell_type": "markdown",
30+
"source": [
31+
"sheXer can generate different types of outputs:\n",
32+
"* SheX (in compact syntax)\n",
33+
"* SHACL (in turtle)\n",
34+
"* Statistics (as annotations in ShEx)\n",
35+
"* Examples of conformance (as annotations in ShEx).\n",
36+
"* UML visualizations.\n",
37+
"* RDF-Config files\n",
38+
"* Shapes for federation.\n",
39+
"\n",
40+
"In this notebook, we provide some examples on how to produce them all."
41+
],
42+
"metadata": {
43+
"id": "pEaY9geKvBW9"
44+
}
45+
},
46+
{
47+
"cell_type": "code",
48+
"source": [
49+
"from shexer.shaper import Shaper\n",
50+
"from shexer.consts import TURTLE_ITER, SHACL_TURTLE, SHEXC, SHAPE_EXAMPLES, CONSTRAINT_EXAMPLES, ALL_EXAMPLES, RATIO_INSTANCES, ABSOLUTE_INSTANCES, MIXED_INSTANCES\n",
51+
"import requests\n",
52+
"\n",
53+
"def remote_to_local(url, local_path):\n",
54+
" response = requests.get(url)\n",
55+
" if response.status_code == 200:\n",
56+
" with open(local_path, \"w\", encoding=\"utf-8\") as out_stream:\n",
57+
" out_stream.write(response.text)\n",
58+
"\n",
59+
"INPUT_GRPAH_PATH = \"local_file.ttl\"\n",
60+
"def default_namespaces():\n",
61+
" return {\"http://example.org/\": \"ex\",\n",
62+
" \"http://www.w3.org/XML/1998/namespace/\": \"xml\",\n",
63+
" \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\": \"rdf\",\n",
64+
" \"http://www.w3.org/2000/01/rdf-schema#\": \"rdfs\",\n",
65+
" \"http://www.w3.org/2001/XMLSchema#\": \"xsd\",\n",
66+
" \"http://xmlns.com/foaf/0.1/\": \"foaf\"\n",
67+
" }\n",
68+
"\n",
69+
"remote_to_local(\"https://raw.githubusercontent.com/weso/shexer/refs/heads/master/test/t_files/t_graph_1.ttl\",\n",
70+
" INPUT_GRPAH_PATH)\n",
71+
"\n",
72+
"print(\"# We will work with this graph:\\n\")\n",
73+
"\n",
74+
"with open(INPUT_GRPAH_PATH) as out_stream:\n",
75+
" print(out_stream.read())"
76+
],
77+
"metadata": {
78+
"id": "8E-z512uvA1N"
79+
},
80+
"execution_count": null,
81+
"outputs": []
82+
},
83+
{
84+
"cell_type": "code",
85+
"source": [
86+
"# Generation of SHACL to file\n",
87+
"\n",
88+
"shaper = Shaper(\n",
89+
" graph_file_input=INPUT_GRPAH_PATH,\n",
90+
" namespaces_dict=default_namespaces(),\n",
91+
" all_classes_mode=True,\n",
92+
" input_format=TURTLE_ITER)\n",
93+
"shaper.shex_graph(output_file=\"shacl_shapes.ttl\", # Provide a disk path to save the results with this parameter\n",
94+
" output_format=SHACL_TURTLE) # Set this param with this value to generate SHACL shapes\n",
95+
"\n",
96+
"with open(\"shacl_shapes.ttl\") as in_stream:\n",
97+
" print(in_stream.read()) # Just checking the content of the file generated\n",
98+
"\n"
99+
],
100+
"metadata": {
101+
"id": "nrPGJ8Gvvy9V"
102+
},
103+
"execution_count": null,
104+
"outputs": []
105+
},
106+
{
107+
"cell_type": "code",
108+
"source": [
109+
"# Same thing, but without saving to file. Instead, result returned as string\n",
110+
"\n",
111+
"shaper = Shaper(\n",
112+
" graph_file_input=INPUT_GRPAH_PATH,\n",
113+
" namespaces_dict=default_namespaces(),\n",
114+
" all_classes_mode=True,\n",
115+
" input_format=TURTLE_ITER)\n",
116+
"result = shaper.shex_graph(string_output=True, # If you set this to True, the method will return string\n",
117+
" output_format=SHACL_TURTLE) # you can indicate a fiel anyway. If you do, the results will be saved to disk regardless of wheter they were returned in a string\n",
118+
"\n",
119+
"print(result)\n",
120+
"print(\"---------------------------\")\n",
121+
"with open(\"shacl_shapes.ttl\") as in_stream:\n",
122+
" print(in_stream.read()) # Just checking the content of the file generated"
123+
],
124+
"metadata": {
125+
"id": "54C7obbkx3jT"
126+
},
127+
"execution_count": null,
128+
"outputs": []
129+
},
130+
{
131+
"cell_type": "code",
132+
"source": [
133+
"# Same input graph, but now we generate results in ShEx\n",
134+
"shaper = Shaper(\n",
135+
" graph_file_input=INPUT_GRPAH_PATH,\n",
136+
" namespaces_dict=default_namespaces(),\n",
137+
" all_classes_mode=True,\n",
138+
" input_format=TURTLE_ITER)\n",
139+
"result = shaper.shex_graph(string_output=True,\n",
140+
" output_format=SHEXC) # SHEXC is the default value. If you dont set this parameter, results will be generated in ShEx.\n",
141+
"print(result)\n"
142+
],
143+
"metadata": {
144+
"id": "DG-XesT6yg9j"
145+
},
146+
"execution_count": null,
147+
"outputs": []
148+
},
149+
{
150+
"cell_type": "code",
151+
"source": [
152+
"# Note that ShEx outputs are commented with statistical information.\n",
153+
"# We can disable such information and just get shapes\n",
154+
"\n",
155+
"\n",
156+
"shaper = Shaper(\n",
157+
" graph_file_input=INPUT_GRPAH_PATH,\n",
158+
" namespaces_dict=default_namespaces(),\n",
159+
" all_classes_mode=True,\n",
160+
" disable_comments=True, # Use this to avoid getting comments on shapes.\n",
161+
" input_format=TURTLE_ITER)\n",
162+
"result = shaper.shex_graph(string_output=True)\n",
163+
"print(result)\n"
164+
],
165+
"metadata": {
166+
"id": "35kIg1dMy-WU"
167+
},
168+
"execution_count": null,
169+
"outputs": []
170+
},
171+
{
172+
"cell_type": "code",
173+
"source": [
174+
"# We could also make comments even richer. shapes can be annotated with examples\n",
175+
"# at constraint level (examples of nodes matching node constraints)\n",
176+
"# or at shape level (examples nodes matching shapes).\n",
177+
"\n",
178+
"# You can get only shapes\n",
179+
"shaper = Shaper(\n",
180+
" graph_file_input=INPUT_GRPAH_PATH,\n",
181+
" namespaces_dict=default_namespaces(),\n",
182+
" all_classes_mode=True,\n",
183+
" disable_comments=False, # Default value, you could just omit this when it is False\n",
184+
" input_format=TURTLE_ITER,\n",
185+
" examples_mode=SHAPE_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n",
186+
"result = shaper.shex_graph(string_output=True)\n",
187+
"print(\"_______examples at shape level________\")\n",
188+
"print(result)\n",
189+
"\n",
190+
"# ... only constraints\n",
191+
"shaper = Shaper(\n",
192+
" graph_file_input=INPUT_GRPAH_PATH,\n",
193+
" namespaces_dict=default_namespaces(),\n",
194+
" all_classes_mode=True,\n",
195+
" disable_comments=False, # Default value, you could just omit this when it is False\n",
196+
" input_format=TURTLE_ITER,\n",
197+
" examples_mode=CONSTRAINT_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n",
198+
"result = shaper.shex_graph(string_output=True)\n",
199+
"print(\"_______examples at constraint level________\")\n",
200+
"print(result)\n",
201+
"\n",
202+
"# ... or both things at a time.\n",
203+
"shaper = Shaper(\n",
204+
" graph_file_input=INPUT_GRPAH_PATH,\n",
205+
" namespaces_dict=default_namespaces(),\n",
206+
" all_classes_mode=True,\n",
207+
" disable_comments=False, # Default value, you could just omit this when it is False\n",
208+
" input_format=TURTLE_ITER,\n",
209+
" examples_mode=ALL_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n",
210+
"result = shaper.shex_graph(string_output=True)\n",
211+
"print(\"_______examples at ALL level________\")\n",
212+
"print(result)"
213+
],
214+
"metadata": {
215+
"id": "gxVueGRWzRV6"
216+
},
217+
"execution_count": null,
218+
"outputs": []
219+
},
220+
{
221+
"cell_type": "code",
222+
"source": [
223+
"# You can also tune if you want absolute, relative or both types for the generated stats\n",
224+
"\n",
225+
"# Relative\n",
226+
"shaper = Shaper(\n",
227+
" graph_file_input=INPUT_GRPAH_PATH,\n",
228+
" namespaces_dict=default_namespaces(),\n",
229+
" all_classes_mode=True,\n",
230+
" input_format=TURTLE_ITER,\n",
231+
" instances_report_mode=RATIO_INSTANCES) # Default value, you could omit this in case you prefer ratio\n",
232+
"result = shaper.shex_graph(string_output=True)\n",
233+
"print(\"_______Relative stats________\")\n",
234+
"print(result)\n",
235+
"\n",
236+
"# Absolute\n",
237+
"shaper = Shaper(\n",
238+
" graph_file_input=INPUT_GRPAH_PATH,\n",
239+
" namespaces_dict=default_namespaces(),\n",
240+
" all_classes_mode=True,\n",
241+
" input_format=TURTLE_ITER,\n",
242+
" instances_report_mode=ABSOLUTE_INSTANCES) # For absolute values\n",
243+
"result = shaper.shex_graph(string_output=True)\n",
244+
"print(\"_______Absolute stats________\")\n",
245+
"print(result)\n",
246+
"\n",
247+
"# Both\n",
248+
"shaper = Shaper(\n",
249+
" graph_file_input=INPUT_GRPAH_PATH,\n",
250+
" namespaces_dict=default_namespaces(),\n",
251+
" all_classes_mode=True,\n",
252+
" input_format=TURTLE_ITER,\n",
253+
" instances_report_mode=MIXED_INSTANCES) # For both at a time\n",
254+
"result = shaper.shex_graph(string_output=True)\n",
255+
"print(\"_______Both stat types________\")\n",
256+
"print(result)"
257+
],
258+
"metadata": {
259+
"id": "p9Q8cY9O2DKS"
260+
},
261+
"execution_count": null,
262+
"outputs": []
263+
},
264+
{
265+
"cell_type": "code",
266+
"source": [
267+
"# Lets generate now UML visualizations instead of shapes\n",
268+
"\n",
269+
"from PIL import Image\n",
270+
"from IPython.display import display\n",
271+
"\n",
272+
"UML_PATH = 'uml_shapes.jpg'\n",
273+
"\n",
274+
"\n",
275+
"shaper = Shaper(\n",
276+
" graph_file_input=INPUT_GRPAH_PATH,\n",
277+
" namespaces_dict=default_namespaces(),\n",
278+
" all_classes_mode=True,\n",
279+
" input_format=TURTLE_ITER)\n",
280+
"shaper.shex_graph(to_uml_path=UML_PATH) # Just set a value for this parameter and the visualization will be generated\n",
281+
"\n",
282+
"image = Image.open(UML_PATH)\n",
283+
"display(image)"
284+
],
285+
"metadata": {
286+
"id": "pt-E6e0o-tDN"
287+
},
288+
"execution_count": null,
289+
"outputs": []
290+
},
291+
{
292+
"cell_type": "code",
293+
"source": [
294+
"# Let's generate now RDF-Config files. Read about RDF-Config at: https://github.com/dbcls/rdf-config\n",
295+
"# 3 RDF-Config files are generated. model, prefixes and endpoint. In case we\n",
296+
"# use RDF input instead of SPARQL endpoints (as in this case), endpont.yaml won't be generated\n",
297+
"# You must tell sheXer the directory in which you want the files to be writen\n",
298+
"# but you can also obtain the output via string.\n",
299+
"\n",
300+
"TARGET_DIR = \".\"\n",
301+
"\n",
302+
"shaper = Shaper(graph_file_input=INPUT_GRPAH_PATH,\n",
303+
" all_classes_mode=True,\n",
304+
" input_format=TURTLE_ITER,\n",
305+
" namespaces_dict=default_namespaces(),\n",
306+
" examples_mode=ALL_EXAMPLES, # When generating RDF-COnfig files, make sure to add this, as examples are a core part of the RDF-Config YAML files\n",
307+
" # and sheXer won't track them unless it is told to do so.\n",
308+
" instances_report_mode=MIXED_INSTANCES)\n",
309+
"\n",
310+
"\n",
311+
"result = shaper.shex_graph(rdfconfig_directory=TARGET_DIR, # Set this property to some value to generate rdf-config files\n",
312+
" string_output=True)\n",
313+
"\n",
314+
"print(result)\n",
315+
"\n"
316+
],
317+
"metadata": {
318+
"id": "XhlOqU3MAC39"
319+
},
320+
"execution_count": null,
321+
"outputs": []
322+
},
323+
{
324+
"cell_type": "markdown",
325+
"source": [
326+
"# Shapes for federated queries\n",
327+
"\n",
328+
"sheXer can also generate some shapes which are meant to be used to document\n",
329+
"structures in scenarios of potential federated queries.\n",
330+
"\n",
331+
"Such feature requires to instantiate an object with several parameters and\n",
332+
"it is explained in a standalone example. Check it at [this link](https://github.com/weso/shexer/blob/master/doc/Example_federated_shapes.ipynb)"
333+
],
334+
"metadata": {
335+
"id": "7NUljguyBy_J"
336+
}
337+
}
338+
]
339+
}

shexer/io/graph/yielder/big_ttl_triples_yielder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
_BOOLEANS = ["true", "false"]
1313
_INI_BASE_URIS = ["/", "#"]
1414
_CLOSURES = [",", ";", "."]
15+
_SPECIAL_CHARS_AFTER_QUOTES = ["^","@"]
1516
_S = 0
1617
_P = 1
1718
_O = 2
@@ -211,7 +212,7 @@ def _find_next_quoted_literal_ending(self, target_str, start_index):
211212
start_index=start_index+1)
212213
if next_quotes +1 > len(target_str) or target_str[next_quotes + 1] == " ":
213214
return next_quotes
214-
elif target_str[next_quotes + 1] == "^":
215+
elif target_str[next_quotes + 1] in _SPECIAL_CHARS_AFTER_QUOTES:
215216
return self._find_next_blank(target_str, next_quotes) - 1
216217
else:
217218
raise ValueError("Malformed literal? It seems like there is a problem of unmatching quotes: " + target_str)

shexer/io/rdfconfig/formater/rdfconfig_serializer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ def _serialize_constraint(self, shape, constraint):
132132
elif example_cons.startswith("http://") or example_cons.startswith("https://"):
133133
example_cons = self._nice_uri(example_cons)
134134
elif not example_cons.isnumeric():
135-
example_cons = f'"{example_cons}"'
135+
if example_cons.startswith('"') and "@" in example_cons: # This must be a lang string
136+
example_cons = example_cons[:example_cons.find("@")]
137+
elif not example_cons.startswith('"'):
138+
example_cons = f'"{example_cons}"'
136139
self._write_shape_line(indentation=_PROPERTY_INDENT_LEVEL,
137140
content=f"{st_property}:")
138141
self._write_shape_line(indentation=_CONSTRAINT_INDENT_LEVEL,

0 commit comments

Comments
 (0)