1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "provenance" : []
7+ },
8+ "kernelspec" : {
9+ "name" : " python3" ,
10+ "display_name" : " Python 3"
11+ },
12+ "language_info" : {
13+ "name" : " python"
14+ }
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " code" ,
19+ "execution_count" : null ,
20+ "metadata" : {
21+ "id" : " 0HH1fi9Iuvmz"
22+ },
23+ "outputs" : [],
24+ "source" : [
25+ " !pip install shexer"
26+ ]
27+ },
28+ {
29+ "cell_type" : " markdown" ,
30+ "source" : [
31+ " sheXer can generate different types of outputs:\n " ,
32+ " * SheX (in compact syntax)\n " ,
33+ " * SHACL (in turtle)\n " ,
34+ " * Statistics (as annotations in ShEx)\n " ,
35+ " * Examples of conformance (as annotations in ShEx).\n " ,
36+ " * UML visualizations.\n " ,
37+ " * RDF-Config files\n " ,
38+ " * Shapes for federation.\n " ,
39+ " \n " ,
40+ " In this notebook, we provide some examples on how to produce them all."
41+ ],
42+ "metadata" : {
43+ "id" : " pEaY9geKvBW9"
44+ }
45+ },
46+ {
47+ "cell_type" : " code" ,
48+ "source" : [
49+ " from shexer.shaper import Shaper\n " ,
50+ " from shexer.consts import TURTLE_ITER, SHACL_TURTLE, SHEXC, SHAPE_EXAMPLES, CONSTRAINT_EXAMPLES, ALL_EXAMPLES, RATIO_INSTANCES, ABSOLUTE_INSTANCES, MIXED_INSTANCES\n " ,
51+ " import requests\n " ,
52+ " \n " ,
53+ " def remote_to_local(url, local_path):\n " ,
54+ " response = requests.get(url)\n " ,
55+ " if response.status_code == 200:\n " ,
56+ " with open(local_path, \" w\" , encoding=\" utf-8\" ) as out_stream:\n " ,
57+ " out_stream.write(response.text)\n " ,
58+ " \n " ,
59+ " INPUT_GRPAH_PATH = \" local_file.ttl\"\n " ,
60+ " def default_namespaces():\n " ,
61+ " return {\" http://example.org/\" : \" ex\" ,\n " ,
62+ " \" http://www.w3.org/XML/1998/namespace/\" : \" xml\" ,\n " ,
63+ " \" http://www.w3.org/1999/02/22-rdf-syntax-ns#\" : \" rdf\" ,\n " ,
64+ " \" http://www.w3.org/2000/01/rdf-schema#\" : \" rdfs\" ,\n " ,
65+ " \" http://www.w3.org/2001/XMLSchema#\" : \" xsd\" ,\n " ,
66+ " \" http://xmlns.com/foaf/0.1/\" : \" foaf\"\n " ,
67+ " }\n " ,
68+ " \n " ,
69+ " remote_to_local(\" https://raw.githubusercontent.com/weso/shexer/refs/heads/master/test/t_files/t_graph_1.ttl\" ,\n " ,
70+ " INPUT_GRPAH_PATH)\n " ,
71+ " \n " ,
72+ " print(\" # We will work with this graph:\\ n\" )\n " ,
73+ " \n " ,
74+ " with open(INPUT_GRPAH_PATH) as out_stream:\n " ,
75+ " print(out_stream.read())"
76+ ],
77+ "metadata" : {
78+ "id" : " 8E-z512uvA1N"
79+ },
80+ "execution_count" : null ,
81+ "outputs" : []
82+ },
83+ {
84+ "cell_type" : " code" ,
85+ "source" : [
86+ " # Generation of SHACL to file\n " ,
87+ " \n " ,
88+ " shaper = Shaper(\n " ,
89+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
90+ " namespaces_dict=default_namespaces(),\n " ,
91+ " all_classes_mode=True,\n " ,
92+ " input_format=TURTLE_ITER)\n " ,
93+ " shaper.shex_graph(output_file=\" shacl_shapes.ttl\" , # Provide a disk path to save the results with this parameter\n " ,
94+ " output_format=SHACL_TURTLE) # Set this param with this value to generate SHACL shapes\n " ,
95+ " \n " ,
96+ " with open(\" shacl_shapes.ttl\" ) as in_stream:\n " ,
97+ " print(in_stream.read()) # Just checking the content of the file generated\n " ,
98+ " \n "
99+ ],
100+ "metadata" : {
101+ "id" : " nrPGJ8Gvvy9V"
102+ },
103+ "execution_count" : null ,
104+ "outputs" : []
105+ },
106+ {
107+ "cell_type" : " code" ,
108+ "source" : [
109+ " # Same thing, but without saving to file. Instead, result returned as string\n " ,
110+ " \n " ,
111+ " shaper = Shaper(\n " ,
112+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
113+ " namespaces_dict=default_namespaces(),\n " ,
114+ " all_classes_mode=True,\n " ,
115+ " input_format=TURTLE_ITER)\n " ,
116+ " result = shaper.shex_graph(string_output=True, # If you set this to True, the method will return string\n " ,
117+ " output_format=SHACL_TURTLE) # you can indicate a fiel anyway. If you do, the results will be saved to disk regardless of wheter they were returned in a string\n " ,
118+ " \n " ,
119+ " print(result)\n " ,
120+ " print(\" ---------------------------\" )\n " ,
121+ " with open(\" shacl_shapes.ttl\" ) as in_stream:\n " ,
122+ " print(in_stream.read()) # Just checking the content of the file generated"
123+ ],
124+ "metadata" : {
125+ "id" : " 54C7obbkx3jT"
126+ },
127+ "execution_count" : null ,
128+ "outputs" : []
129+ },
130+ {
131+ "cell_type" : " code" ,
132+ "source" : [
133+ " # Same input graph, but now we generate results in ShEx\n " ,
134+ " shaper = Shaper(\n " ,
135+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
136+ " namespaces_dict=default_namespaces(),\n " ,
137+ " all_classes_mode=True,\n " ,
138+ " input_format=TURTLE_ITER)\n " ,
139+ " result = shaper.shex_graph(string_output=True,\n " ,
140+ " output_format=SHEXC) # SHEXC is the default value. If you dont set this parameter, results will be generated in ShEx.\n " ,
141+ " print(result)\n "
142+ ],
143+ "metadata" : {
144+ "id" : " DG-XesT6yg9j"
145+ },
146+ "execution_count" : null ,
147+ "outputs" : []
148+ },
149+ {
150+ "cell_type" : " code" ,
151+ "source" : [
152+ " # Note that ShEx outputs are commented with statistical information.\n " ,
153+ " # We can disable such information and just get shapes\n " ,
154+ " \n " ,
155+ " \n " ,
156+ " shaper = Shaper(\n " ,
157+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
158+ " namespaces_dict=default_namespaces(),\n " ,
159+ " all_classes_mode=True,\n " ,
160+ " disable_comments=True, # Use this to avoid getting comments on shapes.\n " ,
161+ " input_format=TURTLE_ITER)\n " ,
162+ " result = shaper.shex_graph(string_output=True)\n " ,
163+ " print(result)\n "
164+ ],
165+ "metadata" : {
166+ "id" : " 35kIg1dMy-WU"
167+ },
168+ "execution_count" : null ,
169+ "outputs" : []
170+ },
171+ {
172+ "cell_type" : " code" ,
173+ "source" : [
174+ " # We could also make comments even richer. shapes can be annotated with examples\n " ,
175+ " # at constraint level (examples of nodes matching node constraints)\n " ,
176+ " # or at shape level (examples nodes matching shapes).\n " ,
177+ " \n " ,
178+ " # You can get only shapes\n " ,
179+ " shaper = Shaper(\n " ,
180+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
181+ " namespaces_dict=default_namespaces(),\n " ,
182+ " all_classes_mode=True,\n " ,
183+ " disable_comments=False, # Default value, you could just omit this when it is False\n " ,
184+ " input_format=TURTLE_ITER,\n " ,
185+ " examples_mode=SHAPE_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n " ,
186+ " result = shaper.shex_graph(string_output=True)\n " ,
187+ " print(\" _______examples at shape level________\" )\n " ,
188+ " print(result)\n " ,
189+ " \n " ,
190+ " # ... only constraints\n " ,
191+ " shaper = Shaper(\n " ,
192+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
193+ " namespaces_dict=default_namespaces(),\n " ,
194+ " all_classes_mode=True,\n " ,
195+ " disable_comments=False, # Default value, you could just omit this when it is False\n " ,
196+ " input_format=TURTLE_ITER,\n " ,
197+ " examples_mode=CONSTRAINT_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n " ,
198+ " result = shaper.shex_graph(string_output=True)\n " ,
199+ " print(\" _______examples at constraint level________\" )\n " ,
200+ " print(result)\n " ,
201+ " \n " ,
202+ " # ... or both things at a time.\n " ,
203+ " shaper = Shaper(\n " ,
204+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
205+ " namespaces_dict=default_namespaces(),\n " ,
206+ " all_classes_mode=True,\n " ,
207+ " disable_comments=False, # Default value, you could just omit this when it is False\n " ,
208+ " input_format=TURTLE_ITER,\n " ,
209+ " examples_mode=ALL_EXAMPLES) # Ste this parameter to some of the values allowed to get shapes annotated with examples\n " ,
210+ " result = shaper.shex_graph(string_output=True)\n " ,
211+ " print(\" _______examples at ALL level________\" )\n " ,
212+ " print(result)"
213+ ],
214+ "metadata" : {
215+ "id" : " gxVueGRWzRV6"
216+ },
217+ "execution_count" : null ,
218+ "outputs" : []
219+ },
220+ {
221+ "cell_type" : " code" ,
222+ "source" : [
223+ " # You can also tune if you want absolute, relative or both types for the generated stats\n " ,
224+ " \n " ,
225+ " # Relative\n " ,
226+ " shaper = Shaper(\n " ,
227+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
228+ " namespaces_dict=default_namespaces(),\n " ,
229+ " all_classes_mode=True,\n " ,
230+ " input_format=TURTLE_ITER,\n " ,
231+ " instances_report_mode=RATIO_INSTANCES) # Default value, you could omit this in case you prefer ratio\n " ,
232+ " result = shaper.shex_graph(string_output=True)\n " ,
233+ " print(\" _______Relative stats________\" )\n " ,
234+ " print(result)\n " ,
235+ " \n " ,
236+ " # Absolute\n " ,
237+ " shaper = Shaper(\n " ,
238+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
239+ " namespaces_dict=default_namespaces(),\n " ,
240+ " all_classes_mode=True,\n " ,
241+ " input_format=TURTLE_ITER,\n " ,
242+ " instances_report_mode=ABSOLUTE_INSTANCES) # For absolute values\n " ,
243+ " result = shaper.shex_graph(string_output=True)\n " ,
244+ " print(\" _______Absolute stats________\" )\n " ,
245+ " print(result)\n " ,
246+ " \n " ,
247+ " # Both\n " ,
248+ " shaper = Shaper(\n " ,
249+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
250+ " namespaces_dict=default_namespaces(),\n " ,
251+ " all_classes_mode=True,\n " ,
252+ " input_format=TURTLE_ITER,\n " ,
253+ " instances_report_mode=MIXED_INSTANCES) # For both at a time\n " ,
254+ " result = shaper.shex_graph(string_output=True)\n " ,
255+ " print(\" _______Both stat types________\" )\n " ,
256+ " print(result)"
257+ ],
258+ "metadata" : {
259+ "id" : " p9Q8cY9O2DKS"
260+ },
261+ "execution_count" : null ,
262+ "outputs" : []
263+ },
264+ {
265+ "cell_type" : " code" ,
266+ "source" : [
267+ " # Lets generate now UML visualizations instead of shapes\n " ,
268+ " \n " ,
269+ " from PIL import Image\n " ,
270+ " from IPython.display import display\n " ,
271+ " \n " ,
272+ " UML_PATH = 'uml_shapes.jpg'\n " ,
273+ " \n " ,
274+ " \n " ,
275+ " shaper = Shaper(\n " ,
276+ " graph_file_input=INPUT_GRPAH_PATH,\n " ,
277+ " namespaces_dict=default_namespaces(),\n " ,
278+ " all_classes_mode=True,\n " ,
279+ " input_format=TURTLE_ITER)\n " ,
280+ " shaper.shex_graph(to_uml_path=UML_PATH) # Just set a value for this parameter and the visualization will be generated\n " ,
281+ " \n " ,
282+ " image = Image.open(UML_PATH)\n " ,
283+ " display(image)"
284+ ],
285+ "metadata" : {
286+ "id" : " pt-E6e0o-tDN"
287+ },
288+ "execution_count" : null ,
289+ "outputs" : []
290+ },
291+ {
292+ "cell_type" : " code" ,
293+ "source" : [
294+ " # Let's generate now RDF-Config files. Read about RDF-Config at: https://github.com/dbcls/rdf-config\n " ,
295+ " # 3 RDF-Config files are generated. model, prefixes and endpoint. In case we\n " ,
296+ " # use RDF input instead of SPARQL endpoints (as in this case), endpont.yaml won't be generated\n " ,
297+ " # You must tell sheXer the directory in which you want the files to be writen\n " ,
298+ " # but you can also obtain the output via string.\n " ,
299+ " \n " ,
300+ " TARGET_DIR = \" .\"\n " ,
301+ " \n " ,
302+ " shaper = Shaper(graph_file_input=INPUT_GRPAH_PATH,\n " ,
303+ " all_classes_mode=True,\n " ,
304+ " input_format=TURTLE_ITER,\n " ,
305+ " namespaces_dict=default_namespaces(),\n " ,
306+ " examples_mode=ALL_EXAMPLES, # When generating RDF-COnfig files, make sure to add this, as examples are a core part of the RDF-Config YAML files\n " ,
307+ " # and sheXer won't track them unless it is told to do so.\n " ,
308+ " instances_report_mode=MIXED_INSTANCES)\n " ,
309+ " \n " ,
310+ " \n " ,
311+ " result = shaper.shex_graph(rdfconfig_directory=TARGET_DIR, # Set this property to some value to generate rdf-config files\n " ,
312+ " string_output=True)\n " ,
313+ " \n " ,
314+ " print(result)\n " ,
315+ " \n "
316+ ],
317+ "metadata" : {
318+ "id" : " XhlOqU3MAC39"
319+ },
320+ "execution_count" : null ,
321+ "outputs" : []
322+ },
323+ {
324+ "cell_type" : " markdown" ,
325+ "source" : [
326+ " # Shapes for federated queries\n " ,
327+ " \n " ,
328+ " sheXer can also generate some shapes which are meant to be used to document\n " ,
329+ " structures in scenarios of potential federated queries.\n " ,
330+ " \n " ,
331+ " Such feature requires to instantiate an object with several parameters and\n " ,
332+ " it is explained in a standalone example. Check it at [this link](https://github.com/weso/shexer/blob/master/doc/Example_federated_shapes.ipynb)"
333+ ],
334+ "metadata" : {
335+ "id" : " 7NUljguyBy_J"
336+ }
337+ }
338+ ]
339+ }
0 commit comments