-
Notifications
You must be signed in to change notification settings - Fork 47
Expand file tree
/
Copy pathutils.py
More file actions
148 lines (124 loc) · 4.41 KB
/
utils.py
File metadata and controls
148 lines (124 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import csv
import hashlib
import os
import urllib
import logging
logger = logging.getLogger(__name__)
import requests
from rdflib import Graph, Namespace
from rdflib.namespace import NamespaceManager, ClosedNamespace
#Data namespace
DATA_NAMESPACE = os.environ.get('DATA_NAMESPACE') or 'http://vivo.school.edu'
VIVO = Namespace('http://vivoweb.org/ontology/core#')
#FOAF = Namespace('http://xmlns.com/foaf/0.1/')
BIBO = Namespace('http://purl.org/ontology/bibo/')
OBO = Namespace('http://purl.obolibrary.org/obo/')
#SCHEMA = Namespace('http://schema.org/')
#SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
namespaces = {}
for k, o in vars().items():
if isinstance(o, (Namespace, ClosedNamespace)):
namespaces[k] = o
ns_mgr = NamespaceManager(Graph())
for k, v in namespaces.items():
ns_mgr.bind(k.lower(), v)
#Use when a named graph isn't specified for SPARQL update.
DEFAULT_GRAPH = 'http://vitro.mannlib.cornell.edu/default/vitro-kb-2'
def _env(name):
val = os.getenv(name)
if val is None:
raise Exception("Can't find {}. Set environment variable.".format(name))
return val
class VUpdate(object):
"""
VIVO SPARQL Update class
"""
def __init__(self):
self.endpoint = _env('VIVO_UPDATE_ENDPOINT')
self.email = _env('VIVO_EMAIL')
self.password = _env('VIVO_PASSWORD')
def add(self, graph, name=None):
"""
See:
https://github.com/RDFLib/rdflib/blob/master/rdflib/plugins/stores/sparqlstore.py#L451
"""
nameg = name or DEFAULT_GRAPH
data = ""
for subject, predicate, obj in graph:
triple = "%s %s %s .\n" % (subject.n3(), predicate.n3(), obj.n3())
data += triple
sparql = "INSERT DATA \n { GRAPH <%s> {\n %s }\n}" % (nameg, data)
self.do_update(sparql)
def remove(self, graph, name=None):
nameg = name or DEFAULT_GRAPH
data = ""
for subject, predicate, obj in graph:
triple = "%s %s %s .\n" % (subject.n3(), predicate.n3(), obj.n3())
data += triple
sparql = "DELETE DATA \n { GRAPH <%s> { %s }\n}" % (nameg, data)
self.do_update(sparql)
def do_update(self, query):
logger.debug('Update query:\n {}'.format(query))
payload = {
'email': self.email,
'password': self.password,
'update': query
}
data = urllib.urlencode(payload)
response = urllib.urlopen(self.endpoint, data)
#This will raise an expection if something goes wrong
if response.code != 200:
raise Exception("SPARQL update failed. Status code: {}".format(str(response.code)))
#Verify that we actually hit the API endpoint. This is hardcoded. Should read
#from properties or something.
if 'api/sparqlUpdate' not in response.url:
raise Exception("Response URL doesn't seem to be the VIVO API URL. Verify settings.")
logger.info("Update response code: {}".format(response.code))
return True
def hash_uri(raw, prefix='n'):
"""
Return a hash of the next in numerical form.
Prefix with the prefix text.
"""
hobj = hashlib.md5(raw)
return prefix + hobj.hexdigest()
def scrub_row(row):
"""
Set values that are empty strings - "" -
to Python None.
Remove carriage returns and line breaks from cells. Encode
as utf-8.
"""
out_dictionary = {}
for k,v in row.items():
#Remove line breaks and carriage returns.
v = v.replace('\n', '').replace('\r', '')
if v == '':
out_dictionary[k] = None
else:
out_dictionary[k] = v.decode('utf-8', 'ignore')
return out_dictionary
def read_file(file_name, delimiter=','):
"""
Read in the file and clean the rows.
"""
out = []
with open(file_name) as infile:
for row in csv.DictReader(infile, delimiter=delimiter):
clean_row = scrub_row(row)
out.append(clean_row)
return out
class CrossRefSearchException(Exception):
pass
def crossref_metadata_search(search_string):
"""
Search the metadata API.
"""
base = "http://search.crossref.org/dois?q={0}".format(search_string)
resp = requests.get(base)
data = resp.json()
if len(data) == 0:
raise CrossRefSearchException("No CR metadata search results")
else:
return data