Skip to content

Commit 4bfb0de

Browse files
committed
Allow for uniq in Semgrex over edge names (which checks based on the relation type)
1 parent 1edb99a commit 4bfb0de

File tree

4 files changed

+64
-15
lines changed

4 files changed

+64
-15
lines changed

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ class SemgrexParser implements SemgrexParserConstants {
2424
// keep track of which regex variable groups we've already seen
2525
// useful for allowing uniq to operate on
2626
private Set<String> knownVarGroups = Generics.newHashSet();
27+
// keep track of which edges we've already seen
28+
// useful for allowing uniq to operate on
29+
private Set<String> knownEdges = Generics.newHashSet();
2730

2831
private static final Redwood.RedwoodChannels log = Redwood.channels(SemgrexParser.class);
2932
private boolean deprecatedAmp = false;
@@ -125,15 +128,15 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
125128
uniqKeys.add(nextIdentifier.image);
126129
}
127130
for (String key : uniqKeys) {
128-
if (!knownVariables.contains(key) && !knownVarGroups.contains(key)) {
129-
{if (true) throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which does not exist in the pattern (as a node or regex)");}
131+
if (!knownVariables.contains(key) && !knownVarGroups.contains(key) && !knownEdges.contains(key)) {
132+
{if (true) throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which does not exist in the pattern (as a node, regex, or edge)");}
130133
}
131-
if (knownVariables.contains(key) && knownVarGroups.contains(key)) {
132-
{if (true) throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which is very confusing, as it is both a node and a regex. Please rename one of them");}
134+
if ((knownVariables.contains(key) && (knownVarGroups.contains(key) || knownEdges.contains(key))) ||
135+
(knownVarGroups.contains(key) && knownEdges.contains(key))) {
136+
{if (true) throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key +
137+
" which is very confusing, as it is ambiguous between node, regex, and edge. Please rename one of them");}
133138
}
134139
}
135-
// TODO: can error check that the keys are unique between node and edge names
136-
// that might require keeping edge names in a known set
137140
// TODO: edge names might need some upgrades anyway - shouldn't name them under negation, for example
138141
node = new UniqPattern(node, uniqKeys);
139142
break;
@@ -428,7 +431,10 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
428431
jj_consume_token(-1);
429432
throw new ParseException();
430433
}
431-
if (numArg == null && numArg2 == null) {
434+
if (edgeName != null) {
435+
knownEdges.add(edgeName.image);
436+
}
437+
if (numArg == null && numArg2 == null) {
432438
reln = GraphRelation.getRelation(rel != null ? rel.image : null,
433439
relnType != null ? relnType.image : null,
434440
name != null ? name.image : null,

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ class SemgrexParser {
3030
// keep track of which regex variable groups we've already seen
3131
// useful for allowing uniq to operate on
3232
private Set<String> knownVarGroups = Generics.newHashSet();
33+
// keep track of which edges we've already seen
34+
// useful for allowing uniq to operate on
35+
private Set<String> knownEdges = Generics.newHashSet();
3336

3437
private static final Redwood.RedwoodChannels log = Redwood.channels(SemgrexParser.class);
3538
private boolean deprecatedAmp = false;
@@ -119,15 +122,15 @@ SemgrexPattern Root() : {
119122
"::" <UNIQ> { uniqKeys = new ArrayList<>(); } (nextIdentifier = identifier() { uniqKeys.add(nextIdentifier.image); })*
120123
{
121124
for (String key : uniqKeys) {
122-
if (!knownVariables.contains(key) && !knownVarGroups.contains(key)) {
123-
throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which does not exist in the pattern (as a node or regex)");
125+
if (!knownVariables.contains(key) && !knownVarGroups.contains(key) && !knownEdges.contains(key)) {
126+
throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which does not exist in the pattern (as a node, regex, or edge)");
124127
}
125-
if (knownVariables.contains(key) && knownVarGroups.contains(key)) {
126-
throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which is very confusing, as it is both a node and a regex. Please rename one of them");
128+
if ((knownVariables.contains(key) && (knownVarGroups.contains(key) || knownEdges.contains(key))) ||
129+
(knownVarGroups.contains(key) && knownEdges.contains(key))) {
130+
throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key +
131+
" which is very confusing, as it is ambiguous between node, regex, and edge. Please rename one of them");
127132
}
128133
}
129-
// TODO: can error check that the keys are unique between node and edge names
130-
// that might require keeping edge names in a known set
131134
// TODO: edge names might need some upgrades anyway - shouldn't name them under negation, for example
132135
node = new UniqPattern(node, uniqKeys);
133136
}
@@ -229,6 +232,9 @@ SemgrexPattern Relation() : {
229232
| (rel = <ALIGNRELN>))
230233

231234
{
235+
if (edgeName != null) {
236+
knownEdges.add(edgeName.image);
237+
}
232238
if (numArg == null && numArg2 == null) {
233239
reln = GraphRelation.getRelation(rel != null ? rel.image : null,
234240
relnType != null ? relnType.image : null,

src/edu/stanford/nlp/semgraph/semgrex/UniqPattern.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ public UniqPattern(SemgrexPattern child, List<String> keys) {
3232
}
3333

3434
private String getKey(SemgrexMatch match, String key) {
35-
// TODO: could also do edge names
3635
IndexedWord node = match.getNode(key);
3736
if (node != null) {
3837
return node.value();
@@ -41,6 +40,10 @@ private String getKey(SemgrexMatch match, String key) {
4140
if (varString != null) {
4241
return varString;
4342
}
43+
SemanticGraphEdge edge = match.getEdge(key);
44+
if (edge != null) {
45+
return edge.getRelation().toString();
46+
}
4447
return null;
4548
}
4649

test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1525,7 +1525,7 @@ public void testBrokenUniq() {
15251525
}
15261526

15271527
/**
1528-
* Test that an illegal uniq expression throws an exception when both node and regex are named
1528+
* Test that an illegal uniq expression throws an exception when at least two of node, regex, and edge names are the same
15291529
*<br>
15301530
* Specifically, the expectation is for a SemgrexParseException
15311531
*/
@@ -1537,6 +1537,22 @@ public void testOverlappingUniq() {
15371537
} catch (SemgrexParseException e) {
15381538
// yay
15391539
}
1540+
1541+
try {
1542+
String pattern = "{word:__#1%foo} <=foo {} :: uniq foo";
1543+
SemgrexPattern semgrex = SemgrexPattern.compile(pattern);
1544+
throw new RuntimeException("This expression should fail because the edge name and regex name overlap");
1545+
} catch (SemgrexParseException e) {
1546+
// yay
1547+
}
1548+
1549+
try {
1550+
String pattern = "{word:__}=foo <=foo {} :: uniq foo";
1551+
SemgrexPattern semgrex = SemgrexPattern.compile(pattern);
1552+
throw new RuntimeException("This expression should fail because the node name and edge name overlap");
1553+
} catch (SemgrexParseException e) {
1554+
// yay
1555+
}
15401556
}
15411557

15421558
/**
@@ -1601,6 +1617,24 @@ public void testBatchUniq() {
16011617
assertEquals(BATCH_PARSES[2], matches.get(1).first().get(CoreAnnotations.TextAnnotation.class));
16021618
assertEquals(1, matches.get(1).second().size());
16031619
assertEquals("bar", matches.get(1).second().get(0).getVariableString("x"));
1620+
1621+
// test the uniq operator on an edge
1622+
semgrex = SemgrexPattern.compile("{} !< {} >=edge {} :: uniq edge");
1623+
matches = semgrex.matchSentences(sentences, false);
1624+
assertEquals(3, matches.size());
1625+
// sentence 0 should match because the root has a child with nmod
1626+
assertEquals(BATCH_PARSES[0], matches.get(0).first().get(CoreAnnotations.TextAnnotation.class));
1627+
assertEquals(1, matches.get(0).second().size());
1628+
assertEquals("nmod", matches.get(0).second().get(0).getEdge("edge").getRelation().toString());
1629+
// sentence 1 should match because the root has a child with obj
1630+
assertEquals(BATCH_PARSES[1], matches.get(1).first().get(CoreAnnotations.TextAnnotation.class));
1631+
assertEquals(1, matches.get(1).second().size());
1632+
assertEquals("obj", matches.get(1).second().get(0).getEdge("edge").getRelation().toString());
1633+
// sentence 2 should match because the root has a child with compound
1634+
assertEquals(BATCH_PARSES[2], matches.get(2).first().get(CoreAnnotations.TextAnnotation.class));
1635+
assertEquals(1, matches.get(2).second().size());
1636+
assertEquals("compound", matches.get(2).second().get(0).getEdge("edge").getRelation().toString());
1637+
// sentence 3 should not match because both nmod and obj were already seen
16041638
}
16051639

16061640
public void testRegexVariableGroups() {

0 commit comments

Comments
 (0)