Skip to content
This repository was archived by the owner on Sep 27, 2024. It is now read-only.

Commit e3ad37e

Browse files
committed
Initial commit
0 parents  commit e3ad37e

File tree

7 files changed

+286
-0
lines changed

7 files changed

+286
-0
lines changed

README.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# SPRL-Spacy
2+
3+
This repository implements an easy to use Spatial Role Labeling module trained on
4+
three entities (`TRAJECTOR`, `SPATIAL_INDICATOR`, `LANDMARK`) and the relations appearing
5+
on the SpRL 2013 IAPR TC-12 dataset.
6+
7+
## Requirements
8+
9+
- [git lfs](https://git-lfs.github.com/) for cloning the large files in this repository
10+
- `spacy >=2.0.0a18` and the necessary requirements
11+
- `sklearn`
12+
- `scipy`
13+
- `pickle` for python 3.7.0
14+
- `problog` for use with ProbLog.
15+
16+
## Usage
17+
18+
1. Clone this repository where you want to use it.
19+
2. Download the two models from the Releases page
20+
3. Import `spacy` and `sprl` and use them like the following example:
21+
22+
23+
```
24+
import spacy
25+
from sprl import *
26+
27+
nlp = spacy.load('en_core_web_lg-sprl')
28+
29+
sentence = "An angry big dog is behind us."
30+
31+
rel = sprl(sentence, nlp)
32+
```
33+
34+
If everything went fine you should get something like:
35+
36+
```
37+
[(An angry big dog, behind, us, 'direction')]
38+
```
39+
40+
If you happen to have problog installed, you can see `example.pl` on how to use it from
41+
within problog.
42+
43+
## Credits
44+
45+
While the model has been trained by me, the relation extraction part uses features from
46+
the paper for Sprl-CWW (see below), and the dataset from SemEval 2013 Task 3: Spatial Role Labeling.
47+
48+
The features for relation extraction:
49+
50+
```
51+
Nichols, Eric, and Fadi Botros.
52+
"SpRL-CWW: Spatial relation classification with independent multi-class models."
53+
Proceedings of the 9th International Workshop on Semantic Evaluation.
54+
```
55+
56+
Semeval 2013 task 3: Spatial Role Labeling
57+
58+
```
59+
Kolomiyets, Oleksandr, et al.
60+
"Semeval-2013 task 3: Spatial role labeling."
61+
Second Joint Conference on Lexical and Computational Semantics
62+
```
63+
64+
So please cite the papers above, as well as spacy and ProbLog (if you use it) in your work :)
65+
66+

example.pl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:-use_module('sprl_pl.py').
2+
3+
query(sprl('A book and a ball on the table', Tr, Sp, Lm, Type)).
4+
%query(in_range(1,10,X1, X2)).

sprl/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .sprl import *
167 Bytes
Binary file not shown.
4.75 KB
Binary file not shown.

sprl/sprl.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Mon Sep 3 17:14:47 2018
5+
6+
@author: Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
7+
8+
Functions that do spatial role labeling. Relation extraction is done
9+
using sklearn with features extracted from the sentence based on the following paper:
10+
cd
11+
Nichols, Eric, and Fadi Botros.
12+
"SpRL-CWW: Spatial relation classification with independent multi-class models."
13+
Proceedings of the 9th International Workshop on Semantic Evaluation (SemEval 2015)
14+
"""
15+
16+
from sklearn.externals import joblib
17+
import spacy
18+
19+
20+
def get_dep_path(span1, span2):
21+
assert span1.sent == span2.sent, "sent1: {}, span1: {}, sent2: {}, span2: {}".format(span1.sent, span1, span2.sent, span2)
22+
23+
up = []
24+
down = []
25+
26+
head = span1[0]
27+
while head.dep_ != 'ROOT':
28+
up.append(head)
29+
head = head.head
30+
up.append(head)
31+
32+
head = span2[0]
33+
while head.dep_ != 'ROOT':
34+
down.append(head)
35+
head = head.head
36+
down.append(head)
37+
down.reverse()
38+
39+
for n1, t1 in enumerate(up):
40+
for n2, t2 in enumerate(down):
41+
if t1 == t2:
42+
return ["{}::{}".format(u.dep_, 'up') for u in up[1:n1]] + ["{}::{}".format(d.dep_, 'down') for d in down[n2:]]
43+
44+
def extract_relation_features(relation):
45+
F = {} # Feature dict
46+
47+
trigger = relation[1]
48+
args = [relation[0], relation[2]]
49+
50+
# Extract features relating to trigger
51+
#trigger_head = get_head(trigger)
52+
53+
for n, token in enumerate(trigger):
54+
F['TF1T{}'.format(n)] = token.text
55+
F['TF2T{}'.format(n)] = token.lemma_
56+
F['TF3T{}'.format(n)] = token.pos_
57+
F['TF4T{}'.format(n)] = "::".join([token.lemma_, token.pos_]) # RF.2 concat RF.1
58+
59+
# Extract features relating to the two arguments
60+
for a, arg in enumerate(args):
61+
if arg is not None:
62+
for n, token in enumerate(arg):
63+
F['A{}F5T{}'.format(a, n)] = token.text
64+
F['A{}F6T{}'.format(a, n)] = token.lemma_
65+
F['A{}F7T{}'.format(a, n)] = token.pos_
66+
F['A{}F8T{}'.format(a, n)] = "::".join([token.lemma_, token.pos_])
67+
68+
69+
if arg[-1].i < trigger[0].i:
70+
F['A{}F12'.format(a)] = 'LEFT'
71+
F['A{}F22'.format(a)] = trigger[0].i - arg[-1].i
72+
elif arg[0].i > trigger[-1].i:
73+
F['A{}F12'.format(a)] = 'RIGHT'
74+
F['A{}F22'.format(a)] = arg[0].i - trigger[-1].i
75+
76+
77+
path = get_dep_path(arg, trigger)
78+
for np, p in enumerate(path):
79+
F['A{}F17E{}'.format(a, np)] = p
80+
F['A{}F20'.format(a)] = len(path)
81+
F['A{}F24'.format(a)] = False
82+
else:
83+
F['A{}F24'.format(a)] = True
84+
85+
# Joint features
86+
if 'A0F12' in F and 'A1F12' in F:
87+
F['F13'] = "::".join([F['A0F12'],F['A1F12']])
88+
if F['A0F12'] == F['A1F12']:
89+
F['14'] = True
90+
else:
91+
F['14'] = False
92+
93+
if 'F13' in F:
94+
for n, token in enumerate(trigger):
95+
F['14T{}'.format(n)] = '::'.join([F['F13'], token.lemma_])
96+
97+
if 'A0F22' in F and 'A1F22' in F:
98+
F['F23'] = F['A0F22'] + F['A1F22']
99+
100+
return F
101+
102+
def extract_candidate_relations_from_sents(sents, gold_relations):
103+
candidate_relations = []
104+
candidate_labels = []
105+
106+
for sent in sents:
107+
108+
triggers = [t for t in sent.ents if t.label_ == 'SPATIAL_INDICATOR']
109+
trajectors = [t for t in sent.ents if t.label_ == 'TRAJECTOR']
110+
landmarks = [t for t in sent.ents if t.label_ == 'LANDMARK']
111+
112+
# print(trajectors, triggers, landmarks)
113+
114+
for trigger in triggers:
115+
for trajector in trajectors:
116+
for landmark in landmarks:
117+
if not (trajector is None and landmark is None):
118+
assert trajector.sent == trigger.sent == landmark.sent, "{}: {}".format(sent, sent.ents)
119+
crel = (trajector, trigger, landmark)
120+
if crel not in gold_relations:
121+
candidate_relations.append(crel)
122+
candidate_labels.append('NONE')
123+
else:
124+
#print("In gold relations already", crel)
125+
pass
126+
return candidate_relations, candidate_labels
127+
128+
def sprl(sentence,
129+
nlp,
130+
model_relext_filename='model_svm_relations.pkl'):
131+
output = []
132+
doc = nlp(sentence)
133+
sents = [nlp(s.text) for s in doc.sents]
134+
candidate_relations, _ = extract_candidate_relations_from_sents(sents, [])
135+
clf, dv = joblib.load(model_relext_filename)
136+
for relation in candidate_relations:
137+
F = extract_relation_features(relation)
138+
feat_vec = dv.transform(F)
139+
general_type = clf.predict(feat_vec)[0]
140+
if general_type != 'NONE':
141+
output.append((relation[0], relation[1], relation[2], general_type))
142+
143+
return output
144+
145+
146+
def sprl_str(sentence,
147+
nlp,
148+
model_relext_filename='model_svm_relations.pkl'):
149+
""" Returns triples where every element is string """
150+
output = []
151+
doc = nlp(sentence)
152+
sents = [nlp(s.text) for s in doc.sents]
153+
candidate_relations, _ = extract_candidate_relations_from_sents(sents, [])
154+
clf, dv = joblib.load(model_relext_filename)
155+
for relation in candidate_relations:
156+
F = extract_relation_features(relation)
157+
feat_vec = dv.transform(F)
158+
general_type = clf.predict(feat_vec)[0]
159+
if general_type != 'NONE':
160+
output.append((str(relation[0]), str(relation[1]), str(relation[2]), general_type))
161+
162+
return output

sprl_pl.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Sep 20 13:51:33 2018
5+
6+
@author: Emmanouil Theofanis Chourdakis
7+
8+
Problog bindings for spacy-sprl
9+
10+
"""
11+
12+
from problog.extern import problog_export, problog_export_nondet, problog_export_raw
13+
from problog.logic import Constant as c_
14+
15+
import spacy
16+
import sprl as mod_sprl
17+
18+
global lut_sprl # For memoization
19+
lut_sprl = {}
20+
21+
global nlp_sprl # Initialize the model the first time it is neede
22+
nlp_sprl = None
23+
24+
@problog_export_nondet('+str', '-str', '-str', '-str', '-str')
25+
def sprl(sentence):
26+
global nlp_sprl, lut_sprl
27+
28+
# Load NLP model if it is not loaded already
29+
if not nlp_sprl:
30+
print("[II] Loading NLP model...")
31+
nlp_sprl = spacy.load('en_core_web_lg-sprl')
32+
print("[II] done.")
33+
34+
# Remove "'"S
35+
sent = sentence
36+
if sent[0] == "'":
37+
sent = sent[1:]
38+
if sent[-1] == "'":
39+
sent = sent[:-1]
40+
41+
if sentence in lut_sprl:
42+
return lut_sprl[sent]
43+
else:
44+
triples = mod_sprl.sprl(sent, nlp_sprl)
45+
lut_sprl[sent] = [tuple([str(t) for t in l]) for l in triples]
46+
L = lut_sprl[sent]
47+
return L
48+
49+
50+
@problog_export_nondet('+int','+int','-int', '-int')
51+
def in_range(a, b):
52+
L= list([(aa+bb, aa*bb) for aa in range(a, b) for bb in range(a, b)])
53+
return L

0 commit comments

Comments
 (0)