Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.util.Quadruple;
import edu.stanford.nlp.util.Triple;

/**
Expand All @@ -26,12 +27,20 @@ public class NodeAttributes {
// String, String, Boolean: key, value, negated
private List<Triple<String, String, Boolean>> attributes;
private Set<String> positiveAttributes;
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
// are represented by Maps. In some cases it will be easier to search
// for individual elements of that map rather than turn the map into a string
// and search on its contents that way. This is especially true since there
// is no guarantee the map will be in a consistent order.
// String, String, String, Boolean: node attribute for a map (such as CoNLLUFeats), key in that map, value to match, negated?
private List<Quadruple<String, String, String, Boolean>> contains;

public NodeAttributes() {
root = false;
empty = false;
attributes = new ArrayList<>();
positiveAttributes = new HashSet<>();
contains = new ArrayList<>();
}

public void setRoot(boolean root) {
Expand Down Expand Up @@ -60,7 +69,15 @@ public void setAttribute(String key, String value, boolean negated) {
attributes.add(new Triple(key, value, negated));
}

public void addContains(String annotation, String key, String value, Boolean negated) {
contains.add(new Quadruple(annotation, key, value, negated));
}

public List<Triple<String, String, Boolean>> attributes() {
return Collections.unmodifiableList(attributes);
}

public List<Quadruple<String, String, String, Boolean>> contains() {
return Collections.unmodifiableList(contains);
}
}
108 changes: 91 additions & 17 deletions src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.stanford.nlp.ling.AnnotationLookup;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Quadruple;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;

Expand All @@ -32,6 +34,11 @@ public class NodePattern extends SemgrexPattern {
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
*/
private final List<Attribute> attributes;
/**
* Attributes which represent Maps (eg CoNLLUFeats)
* and only partial matches are necessary
*/
private final List<Pair<String, Attribute>> partialAttributes;
private final boolean isRoot;
private final boolean isLink;
private final boolean isEmpty;
Expand All @@ -58,6 +65,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
// order the attributes so that the pattern stays the same when
// printing a compiled pattern
this.attributes = new ArrayList<>();
// same with partial attributes
this.partialAttributes = new ArrayList<>();

descString = "{";
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
if (!descString.equals("{"))
Expand All @@ -70,23 +80,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
if (value.equals("__")) {
attributes.add(new Attribute(key, true, true, negated));
} else if (value.matches("/.*/")) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
char chr = value.charAt(i);
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
isRegexp = true;
break;
}
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
attributes.add(new Attribute(key,
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
negated));
} else {
attributes.add(new Attribute(key, patternContent, patternContent, negated));
}
attributes.add(buildRegexAttribute(key, value, negated));
} else { // raw description
attributes.add(new Attribute(key, value, value, negated));
}
Expand All @@ -98,6 +92,37 @@ public NodePattern(GraphRelation r, boolean negDesc,
}
}

for (Quadruple<String, String, String, Boolean> entry : attrs.contains()) {
String annotation = entry.first();
String key = entry.second();
String value = entry.third();
boolean negated = entry.fourth();

Class<?> clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation));
boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz);
if (!isMap) {
throw new SemgrexParseException("Cannot process a single key/value from annotation " + annotation + " as it is not a Map");
}

final Attribute attr;
// Add the attributes for this key
if (value.equals("__")) {
attr = new Attribute(key, true, true, negated);
} else if (value.matches("/.*/")) {
attr = buildRegexAttribute(key, value, negated);
} else { // raw description
attr = new Attribute(key, value, value, negated);
}
partialAttributes.add(new Pair<>(annotation, attr));

if (!descString.equals("{"))
descString += ";";
String separator = negated ? "!:" : ":";
// TODO: the descString might look nicer if multiple contains
// for the same attribute were collapsed into the same map
descString += (annotation + ":{" + key + ":" + value + "}");
}

if (attrs.root()) {
if (!descString.equals("{"))
descString += ";";
Expand All @@ -118,6 +143,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
this.variableGroups = Collections.unmodifiableList(variableGroups);
}

/**
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
* Return an Attribute which matches this expression
*/
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
char chr = value.charAt(i);
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
isRegexp = true;
break;
}
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
return new Attribute(key,
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
negated);
} else {
return new Attribute(key, patternContent, patternContent, negated);
}
}

private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue) {
if (nodeValue == null) {
// treat non-existent attributes has having matched a negated expression
Expand Down Expand Up @@ -189,6 +238,31 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
return negDesc;
}
}
for (Pair<String, Attribute> partialAttribute : partialAttributes) {
String annotation = partialAttribute.first();
Attribute attr = partialAttribute.second();

Class clazz = Env.lookupAnnotationKey(env, annotation);
Object rawmap = node.get(clazz);
final String nodeValue;
if (rawmap == null) {
nodeValue = null;
} else {
if (!(rawmap instanceof Map))
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
Map<String, ?> map = (Map) rawmap;

// TODO: allow for regex match on the keys?
Object value = map.get(attr.key);
nodeValue = (value == null) ? null : value.toString();
}

boolean matches = checkMatch(attr, ignoreCase, nodeValue);
if (!matches) {
return negDesc;
}
}

// System.out.println("matches");
// System.out.println("");
return !negDesc;
Expand Down
Loading