-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessing.py
More file actions
44 lines (34 loc) · 1.4 KB
/
preprocessing.py
File metadata and controls
44 lines (34 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
from tqdm import tqdm
from SPARQLWrapper import SPARQLWrapper, CSV
import urllib.parse
config_folder = './config'
data_folder = './data'
with open(os.path.join(config_folder, 'prefixes.txt'), 'r') as file:
prefixes = [x.strip() for x in file.readlines()]
with open(os.path.join(config_folder, 'object_properties.txt'), 'r') as file:
props = [x.strip() for x in file.readlines()]
sparql = SPARQLWrapper("http://data.odeuropa.eu/repositories/odeuropa")
sparql.setReturnFormat(CSV)
os.makedirs(data_folder, exist_ok=True)
for p in tqdm(props):
q = '\n'.join(prefixes) + '\n' + \
'SELECT ?s ?o FROM <http://www.ontotext.com/explicit>' \
'WHERE { GRAPH ?g { ?s %s ?o } ' \
'FILTER (?g != "http://data.odeuropa.eu/image-annotation")}' % p
sparql.setQuery(q)
ret = sparql.queryAndConvert()
with open(os.path.join(data_folder, urllib.parse.quote(p.replace(':', '_'), safe='') + '.csv'), 'wb') as file:
file.write(ret)
with open(os.path.join(config_folder, 'get_voc.rq'), 'r') as file:
query = file.read()
sparql.setQuery(query)
ret = sparql.queryAndConvert()
with open(os.path.join('./', 'voc.txt'), 'wb') as file:
file.write(ret)
with open(os.path.join(config_folder, 'get_smells.rq'), 'r') as file:
query = file.read()
sparql.setQuery(query)
ret = sparql.queryAndConvert()
with open(os.path.join('./', 'smells.txt'), 'wb') as file:
file.write(ret)