Skip to content

Commit 3ed286f

Browse files
authored
Merge pull request #124 from percyliang/geo880
adding a geo880 module as a playground
2 parents 53dacb5 + a7a4c6b commit 3ed286f

File tree

11 files changed

+324
-7
lines changed

11 files changed

+324
-7
lines changed

build.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@
8888
<jar destfile="${libsempre}/sempre-overnight.jar" basedir="${classes}/overnight"/>
8989
</target>
9090

91+
<!-- Compile geo880 -->
92+
<target name="geo880" depends="init,core,corenlp,tables">
93+
<echo message="Compiling ${ant.project.name}: geo880"/>
94+
<mkdir dir="${classes}/geo880"/>
95+
<javac srcdir="${src}" destdir="${classes}/geo880" classpathref="lib.path" debug="true" includeantruntime="false" source="${source}" target="${target}">
96+
<include name="edu/stanford/nlp/sempre/geo880/"/>
97+
</javac>
98+
<jar destfile="${libsempre}/sempre-geo880.jar" basedir="${classes}/geo880"/>
99+
</target>
91100

92101
<!-- Clean up -->
93102
<target name="clean">

pull-dependencies

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,15 @@ addModule('esslli_2016', 'Data for ESSLLI 2016 semantic parsing class', lambda {
200200
pull('/u/nlp/data/semparse/esslli_2016', 'data/esslli_2016/', {:symlink => true})
201201
})
202202

203+
addModule('geo880', 'Data, lexicon, grammars and KB for geo880', lambda {
204+
pull('/u/nlp/data/semparse/geo880/geo880-test.examples', 'data/geo880', {:symlink => true})
205+
pull('/u/nlp/data/semparse/geo880/geo880-test.preprocessed.examples', 'data/geo880', {:symlink => true})
206+
pull('/u/nlp/data/semparse/geo880/geo880-train.preprocessed.examples', 'data/geo880', {:symlink => true})
207+
pull('/u/nlp/data/semparse/geo880/geo880.grammar', 'data/geo880', {:symlink => true})
208+
pull('/u/nlp/data/semparse/geo880/geo880.lexicon', 'data/geo880', {:symlink => true})
209+
pull('/u/nlp/data/semparse/geo880/geo880.kg', 'data/geo880', {:symlink => true})
210+
pull('/u/nlp/data/semparse/geo880/geo880.type_hierarchy', 'data/geo880', {:symlink => true})
211+
})
203212
############################################################
204213

205214
if ARGV.size == 0

run

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,72 @@ addMode('genovernight-wrapper', 'Generate utterances for overnight semantic pars
918918
lambda { |e| system 'mkdir -p genovernight.out'; o('execDir', 'genovernight.out/' + e[:domain]) },
919919
nil) })
920920

921+
addMode('geo880', 'Semantic parsing on the geo880 dataset', lambda { |e| l(
922+
# Usual header
923+
header('core,tables,corenlp,geo880'),
924+
'edu.stanford.nlp.sempre.Main',
925+
# Fig parameters
926+
figOpts,
927+
o('executor', 'tables.lambdadcs.LambdaDCSExecutor'),
928+
o('JoinFn.specializedTypeCheck', false), o('JoinFn.typeInference', false),
929+
# Parser
930+
o('Builder.parser', 'BeamParser'),
931+
o('Parser.coarsePrune'),
932+
933+
# Evaluation
934+
o('Builder.valueEvaluator', 'geo880.Geo880ValueEvaluator'),
935+
936+
# Grammar
937+
o('Grammar.inPaths','lib/data/geo880/geo880.grammar'),
938+
939+
# Type hierarchy
940+
o('Geo880TypeLookup.typeHierarchyPath', 'lib/data/geo880/geo880.type_hierarchy'),
941+
o('TypeInference.typeLookup','geo880.Geo880TypeLookup'),
942+
943+
# Yrkvpba
944+
o('SimpleLexicon.inPaths', 'lib/data/geo880/geo880.lexicon'),
945+
946+
# Learner
947+
o('Learner.maxTrainIters', 3),
948+
949+
# Dataset
950+
letDefault(:data, 0),
951+
sel(:data,
952+
l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.preprocessed.examples'), unbalancedTrainDevSplit), # (0) train 0.8, dev 0.2
953+
l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.examples', 'test,lib/data/geo880/geo880-test.preprocessed/examples')), # (1) Don't run on test yet!
954+
nil),
955+
# Load the graph
956+
o('Dataset.globalGraphPath', 'lib/data/geo880/geo880.kg'),
957+
# Verbosity
958+
letDefault(:verbose, 0),
959+
sel(:verbose,
960+
l(),
961+
l(
962+
o('showRules'),
963+
o('Parser.verbose', 2),
964+
o('JoinFn.verbose', 3),
965+
o('JoinFn.showTypeCheckFailures'),
966+
nil),
967+
nil),
968+
# Language Analyzer
969+
l(o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), o('annotators', *'tokenize ssplit pos lemma ner'.split)),
970+
# Regularization
971+
letDefault(:l1, 0),
972+
sel(:l1,
973+
l(),
974+
l(o('Params.l1Reg','lazy'), o('Params.l1RegCoeff', '3e-5')),
975+
l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0, 0.00001, 0.0001, 0.001, 0.01)),
976+
nil),
977+
# Features
978+
letDefault(:feat, 'freebase'),
979+
sel(:feat, {
980+
'none' => l(), # No features (random)
981+
'freebase' => l(
982+
o('FeatureExtractor.featureDomains', 'rule opCount constant whType span lemmaAndBinaries denotation lexAlign joinPos skipPos'.split),
983+
# o('FeatureExtractor.featureDomains', 'rule opCount constant whType lemmaAndBinaries denotation lexAlign joinPos skipPos'.split),
984+
nil),
985+
}),
986+
nil) })
921987

922988
############################################################
923989

src/edu/stanford/nlp/sempre/ContextValue.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public ContextValue(String user, DateValue date, List<Exchange> exchanges) {
6868
}
6969

7070
public ContextValue(KnowledgeGraph graph) {
71-
this(null, null, null, graph);
71+
this(null, null, new ArrayList(), graph);
7272
}
7373

7474
// Example:
@@ -107,8 +107,11 @@ public LispTree toLispTree() {
107107
tree.addChild(LispTree.proto.newList("user", user));
108108
if (date != null)
109109
tree.addChild(date.toLispTree());
110+
// When logging examples, logging the entire graph takes too much screen space.
111+
// I don't think that we ever deserialize a graph from a serialized context,
112+
// so this should be fine.
110113
if (graph != null)
111-
tree.addChild(graph.toLispTree());
114+
tree.addChild(graph.toShortLispTree());
112115
for (Exchange e : exchanges)
113116
tree.addChild(LispTree.proto.newList("exchange", e.toLispTree()));
114117
return tree;

src/edu/stanford/nlp/sempre/Dataset.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ public static class Options {
3838

3939
@Option(gloss = "Only keep examples which have at most this number of tokens")
4040
public int maxTokens = Integer.MAX_VALUE;
41+
42+
@Option(gloss = "Path to a knowledge graph that will be uploaded as global context")
43+
public String globalGraphPath;
4144
}
4245

4346
public static Options opts = new Options();
@@ -96,10 +99,22 @@ public void readFromPathPairs(List<Pair<String, String>> pathPairs) {
9699
return;
97100
}
98101
}
99-
100102
readLispTreeFromPathPairs(pathPairs);
103+
updateGlobalContext();
104+
}
105+
106+
private void updateGlobalContext() {
107+
if (opts.globalGraphPath != null) {
108+
KnowledgeGraph graph = NaiveKnowledgeGraph.fromFile(opts.globalGraphPath);
109+
for (String group : allExamples.keySet()) {
110+
for (Example ex : allExamples.get(group)) {
111+
ex.setContext(new ContextValue(graph));
112+
}
113+
}
114+
}
101115
}
102116

117+
103118
private void readJsonFromPathPairs(List<Pair<String, String>> pathPairs) {
104119
List<GroupInfo> groups = Lists.newArrayListWithCapacity(pathPairs.size());
105120
for (Pair<String, String> pathPair : pathPairs) {

src/edu/stanford/nlp/sempre/FeatureExtractor.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,12 @@ void conjoinLemmaAndBinary(Example ex, Derivation deriv) {
211211
List<String> nonEntityLemmas = new LinkedList<>();
212212
extractNonEntityLemmas(ex, deriv, nonEntityLemmas);
213213
List<String> binaries = extractBinaries(deriv.formula);
214-
String binariesStr = Joiner.on('_').join(binaries);
215-
for (String nonEntityLemma : nonEntityLemmas) {
216-
deriv.addFeature("lemmaAndBinaries", "nonEntitylemmas=" + nonEntityLemma +
217-
",binaries=" + binariesStr);
214+
if (!binaries.isEmpty()) {
215+
String binariesStr = Joiner.on('_').join(binaries);
216+
for (String nonEntityLemma : nonEntityLemmas) {
217+
deriv.addFeature("lemmaAndBinaries", "nonEntitylemmas=" + nonEntityLemma +
218+
",binaries=" + binariesStr);
219+
}
218220
}
219221
}
220222

src/edu/stanford/nlp/sempre/KnowledgeGraph.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public static List<Pair<Value, Value>> getReversedPairs(Collection<Pair<Value, V
8585
// ============================================================
8686

8787
public abstract LispTree toLispTree();
88+
public abstract LispTree toShortLispTree();
8889
@Override public String toString() { return toLispTree().toString(); }
8990

9091
/** Return all y such that x in firsts and (x,r,y) in graph */

src/edu/stanford/nlp/sempre/NaiveKnowledgeGraph.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,16 @@ public LispTree toLispTree() {
198198
}
199199
return tree;
200200
}
201+
202+
@Override
203+
public LispTree toShortLispTree() {
204+
if (triples.size() > 1000) {
205+
LispTree tree = LispTree.proto.newList();
206+
tree.addChild("graph");
207+
tree.addChild("NaiveKnowledgeGraph");
208+
tree.addChild(("TooManyTriples"));
209+
return tree;
210+
}
211+
return toLispTree();
212+
}
201213
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package edu.stanford.nlp.sempre.geo880;
2+
3+
import edu.stanford.nlp.sempre.SemType;
4+
import edu.stanford.nlp.sempre.SemTypeHierarchy;
5+
import edu.stanford.nlp.sempre.TypeLookup;
6+
import fig.basic.IOUtils;
7+
import fig.basic.Option;
8+
import fig.basic.LogInfo;
9+
10+
import java.io.IOException;
11+
import java.util.HashSet;
12+
import java.util.Set;
13+
14+
/**
15+
* Type lookup for the geo880 domain, Mostly for distinguishing locations and numbers.
16+
* We also use a type hierarchy provided by a file to match |location.us_state| and |location.location| etc.
17+
* Created by joberant on 05/12/2016.
18+
*/
19+
public class Geo880TypeLookup implements TypeLookup{
20+
public static class Options {
21+
@Option(gloss = "Verbosity") public int verbose = 0;
22+
@Option(gloss = "A path to a file that specified the type hierarchy.")
23+
public String typeHierarchyPath;
24+
25+
}
26+
public static Options opts = new Options();
27+
public static final String LOCATION = "fb:location.location";
28+
public static final String CITY = "fb:location.citytown";
29+
public static final String STATE = "fb:location.us_state";
30+
public static final String RIVER = "fb:location.river";
31+
public static final String LAKE = "fb:location.lake";
32+
public static final String MOUNTAIN = "fb:location.mountain";
33+
public static final String COUNTRY = "fb:location.country";
34+
35+
public Geo880TypeLookup() {
36+
SemTypeHierarchy semTypeHierarchy = SemTypeHierarchy.singleton;
37+
if (opts.typeHierarchyPath != null) {
38+
try {
39+
for (String line : IOUtils.readLines(opts.typeHierarchyPath)) {
40+
String[] tokens = line.split("\\s+");
41+
42+
// Check the file only contains relations about supertypes.
43+
assert tokens[1].endsWith("included_types");
44+
semTypeHierarchy.addSupertype(tokens[0], tokens[0]);
45+
semTypeHierarchy.addSupertype(tokens[2], tokens[2]);
46+
semTypeHierarchy.addSupertype(tokens[0], tokens[2]);
47+
}
48+
} catch (IOException e) {
49+
e.printStackTrace();
50+
throw new RuntimeException("Could not read lines from: " + opts.typeHierarchyPath);
51+
}
52+
}
53+
}
54+
55+
@Override
56+
public SemType getEntityType(String entity) {
57+
// Entites are of the form fb:state.florida.
58+
int colonIndex = entity.indexOf(':');
59+
int dotIndex = entity.indexOf('.');
60+
String type = entity.substring(colonIndex+1, dotIndex);
61+
62+
if (type.equals("place")) {
63+
type = LOCATION;
64+
}
65+
else if (type.equals("city")) {
66+
type = CITY;
67+
}
68+
else if (type.equals("state")) {
69+
type = STATE;
70+
}
71+
else if (type.equals("river")) {
72+
type = RIVER;
73+
}
74+
else if (type.equals("lake")) {
75+
type = LAKE;
76+
}
77+
else if (type.equals("mountain")) {
78+
type = MOUNTAIN;
79+
}
80+
else if (type.equals("country")) {
81+
type = COUNTRY;
82+
}
83+
else {
84+
throw new RuntimeException("Illegal entity: " + entity);
85+
}
86+
SemType result = SemType.newUnionSemType(type);
87+
if (opts.verbose >= 1) {
88+
LogInfo.logs("Entity=%s, Type=%s", entity, result);
89+
}
90+
return result;
91+
}
92+
93+
@Override
94+
public SemType getPropertyType(String property) {
95+
// Properties are of the form fb:location.location.population.
96+
String arg1 = property.substring(0, property.lastIndexOf('.'));
97+
String suffix = property.substring(property.lastIndexOf('.') + 1);
98+
String arg2 = LOCATION;
99+
if (suffix.equals("density") || suffix.equals("elevation") ||
100+
suffix.equals("population") || suffix.equals("size") ||
101+
suffix.equals("area") || suffix.equals("length")) {
102+
arg2 = "fb:type.number";
103+
}
104+
SemType result = SemType.newFuncSemType(arg2, arg1);
105+
if (opts.verbose >= 1) {
106+
LogInfo.logs("Property=%s, Type=%s", property, result);
107+
}
108+
return result;
109+
}
110+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package edu.stanford.nlp.sempre.geo880;
2+
3+
import edu.stanford.nlp.sempre.*;
4+
import edu.stanford.nlp.sempre.tables.StringNormalizationUtils;
5+
import fig.basic.LogInfo;
6+
7+
import java.util.List;
8+
9+
/**
10+
* This is only used because the data does not mention when a city is in the usa, but
11+
* the kg returns usa, and we want to use exact match, so we add this logic here.
12+
* Created by joberant on 03/12/2016.
13+
*/
14+
public class Geo880ValueEvaluator implements ValueEvaluator {
15+
16+
public double getCompatibility(Value target, Value pred) {
17+
List<Value> targetList = ((ListValue) target).values;
18+
if (!(pred instanceof ListValue)) return 0;
19+
List<Value> predList = ((ListValue) pred).values;
20+
21+
// In geo880, if we return that something is contained in a state, there is no need to return fb:country.usa
22+
Value toDelete = null;
23+
if (predList.size() > 1 && predList.get(0) instanceof NameValue) {
24+
for (Value v: predList) {
25+
String id = ((NameValue) v).id;
26+
if (id.equals("fb:country.usa")) {
27+
toDelete = v;
28+
break;
29+
}
30+
}
31+
}
32+
if (toDelete != null) {
33+
predList.remove(toDelete);
34+
}
35+
36+
if (targetList.size() != predList.size()) return 0;
37+
38+
for (Value targetValue : targetList) {
39+
boolean found = false;
40+
for (Value predValue : predList) {
41+
if (getItemCompatibility(targetValue, predValue)) {
42+
found = true;
43+
break;
44+
}
45+
}
46+
if (!found) return 0;
47+
}
48+
return 1;
49+
}
50+
51+
// ============================================================
52+
// Item Compatibility
53+
// ============================================================
54+
55+
// Compare one element of the list.
56+
protected boolean getItemCompatibility(Value target, Value pred) {
57+
if (pred instanceof ErrorValue) return false; // Never award points for error
58+
if (pred == null) {
59+
LogInfo.warning("Predicted value is null!");
60+
return false;
61+
}
62+
63+
if (target instanceof DescriptionValue) {
64+
String targetText = ((DescriptionValue) target).value;
65+
if (pred instanceof NameValue) {
66+
// Just has to match the description
67+
String predText = ((NameValue) pred).description;
68+
if (predText == null) predText = "";
69+
return targetText.equals(predText);
70+
}
71+
} else if (target instanceof NumberValue) {
72+
NumberValue targetNumber = (NumberValue) target;
73+
if (pred instanceof NumberValue) {
74+
return compareNumberValues(targetNumber, (NumberValue) pred);
75+
}
76+
}
77+
78+
return target.equals(pred);
79+
}
80+
81+
protected boolean compareNumberValues(NumberValue target, NumberValue pred) {
82+
return Math.abs(target.value - pred.value) < 1e-6;
83+
}
84+
85+
}

0 commit comments

Comments
 (0)