diff --git a/src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java b/src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java index 6891f7556c..ae9efbd802 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Set; +import edu.stanford.nlp.util.Quadruple; import edu.stanford.nlp.util.Triple; /** @@ -26,12 +27,20 @@ public class NodeAttributes { // String, String, Boolean: key, value, negated private List> attributes; private Set positiveAttributes; + // Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats) + // are represented by Maps. In some cases it will be easier to search + // for individual elements of that map rather than turn the map into a string + // and search on its contents that way. This is especially true since there + // is no guarantee the map will be in a consistent order. + // String, String, String, Boolean: node attribute for a map (such as CoNLLUFeats), key in that map, value to match, negated? + private List> contains; public NodeAttributes() { root = false; empty = false; attributes = new ArrayList<>(); positiveAttributes = new HashSet<>(); + contains = new ArrayList<>(); } public void setRoot(boolean root) { @@ -60,7 +69,15 @@ public void setAttribute(String key, String value, boolean negated) { attributes.add(new Triple(key, value, negated)); } + public void addContains(String annotation, String key, String value, Boolean negated) { + contains.add(new Quadruple(annotation, key, value, negated)); + } + public List> attributes() { return Collections.unmodifiableList(attributes); } + + public List> contains() { + return Collections.unmodifiableList(contains); + } } diff --git a/src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java b/src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java index 5f0eab72a1..6f7cb4c9f0 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java @@ -8,10 +8,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import edu.stanford.nlp.ling.AnnotationLookup; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.util.Pair; +import edu.stanford.nlp.util.Quadruple; import edu.stanford.nlp.util.Triple; import edu.stanford.nlp.util.logging.Redwood; @@ -32,6 +34,11 @@ public class NodePattern extends SemgrexPattern { * Otherwise, the type will be a Pattern, and you must use Pattern.matches(). */ private final List attributes; + /** + * Attributes which represent Maps (eg CoNLLUFeats) + * and only partial matches are necessary + */ + private final List> partialAttributes; private final boolean isRoot; private final boolean isLink; private final boolean isEmpty; @@ -58,6 +65,9 @@ public NodePattern(GraphRelation r, boolean negDesc, // order the attributes so that the pattern stays the same when // printing a compiled pattern this.attributes = new ArrayList<>(); + // same with partial attributes + this.partialAttributes = new ArrayList<>(); + descString = "{"; for (Triple entry : attrs.attributes()) { if (!descString.equals("{")) @@ -70,23 +80,7 @@ public NodePattern(GraphRelation r, boolean negDesc, if (value.equals("__")) { attributes.add(new Attribute(key, true, true, negated)); } else if (value.matches("/.*/")) { - boolean isRegexp = false; - for (int i = 1; i < value.length() - 1; ++i) { - char chr = value.charAt(i); - if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) { - isRegexp = true; - break; - } - } - String patternContent = value.substring(1, value.length() - 1); - if (isRegexp) { - attributes.add(new Attribute(key, - Pattern.compile(patternContent), - Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE), - negated)); - } else { - attributes.add(new Attribute(key, patternContent, patternContent, negated)); - } + attributes.add(buildRegexAttribute(key, value, negated)); } else { // raw description attributes.add(new Attribute(key, value, value, negated)); } @@ -98,6 +92,37 @@ public NodePattern(GraphRelation r, boolean negDesc, } } + for (Quadruple entry : attrs.contains()) { + String annotation = entry.first(); + String key = entry.second(); + String value = entry.third(); + boolean negated = entry.fourth(); + + Class clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation)); + boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz); + if (!isMap) { + throw new SemgrexParseException("Cannot process a single key/value from annotation " + annotation + " as it is not a Map"); + } + + final Attribute attr; + // Add the attributes for this key + if (value.equals("__")) { + attr = new Attribute(key, true, true, negated); + } else if (value.matches("/.*/")) { + attr = buildRegexAttribute(key, value, negated); + } else { // raw description + attr = new Attribute(key, value, value, negated); + } + partialAttributes.add(new Pair<>(annotation, attr)); + + if (!descString.equals("{")) + descString += ";"; + String separator = negated ? "!:" : ":"; + // TODO: the descString might look nicer if multiple contains + // for the same attribute were collapsed into the same map + descString += (annotation + ":{" + key + ":" + value + "}"); + } + if (attrs.root()) { if (!descString.equals("{")) descString += ";"; @@ -118,6 +143,30 @@ public NodePattern(GraphRelation r, boolean negDesc, this.variableGroups = Collections.unmodifiableList(variableGroups); } + /** + * Tests the value to see if it's really a regex, or just a string wrapped in regex. + * Return an Attribute which matches this expression + */ + private Attribute buildRegexAttribute(String key, String value, boolean negated) { + boolean isRegexp = false; + for (int i = 1; i < value.length() - 1; ++i) { + char chr = value.charAt(i); + if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) { + isRegexp = true; + break; + } + } + String patternContent = value.substring(1, value.length() - 1); + if (isRegexp) { + return new Attribute(key, + Pattern.compile(patternContent), + Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE), + negated); + } else { + return new Attribute(key, patternContent, patternContent, negated); + } + } + private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue) { if (nodeValue == null) { // treat non-existent attributes has having matched a negated expression @@ -189,6 +238,31 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i return negDesc; } } + for (Pair partialAttribute : partialAttributes) { + String annotation = partialAttribute.first(); + Attribute attr = partialAttribute.second(); + + Class clazz = Env.lookupAnnotationKey(env, annotation); + Object rawmap = node.get(clazz); + final String nodeValue; + if (rawmap == null) { + nodeValue = null; + } else { + if (!(rawmap instanceof Map)) + throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!"); + Map map = (Map) rawmap; + + // TODO: allow for regex match on the keys? + Object value = map.get(attr.key); + nodeValue = (value == null) ? null : value.toString(); + } + + boolean matches = checkMatch(attr, ignoreCase, nodeValue); + if (!matches) { + return negDesc; + } + } + // System.out.println("matches"); // System.out.println(""); return !negDesc; diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java index 110925af97..0ebfb5b2d5 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java @@ -65,7 +65,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th case 11: case 15: case 17: - case 23:{ + case 26:{ node = SubNode(GraphRelation.ROOT); children.add(node); label_1: @@ -135,7 +135,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th } case 15: case 17: - case 23:{ + case 26:{ result = ModNode(r); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case RELATION: @@ -397,7 +397,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case 15: case 17: - case 23:{ + case 26:{ node = ModNode(reln); break; } @@ -454,7 +454,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th case 14: case 15: case 17: - case 23:{ + case 26:{ ; break; } @@ -485,7 +485,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th boolean startUnderNeg; switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case 17: - case 23:{ + case 26:{ child = Child(r); break; } @@ -512,7 +512,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th child = NodeDisj(r); break; } - case 23:{ + case 26:{ child = Description(r); break; } @@ -526,43 +526,143 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th } final public void AddAttribute(NodeAttributes attributes) throws ParseException {Token attr = null; + Token key = null; Token value = null; Token attrType = null; + boolean negated = false; switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case IDENTIFIER:{ attr = jj_consume_token(IDENTIFIER); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case 10:{ - attrType = jj_consume_token(10); - break; - } + case 10: case 22:{ - attrType = jj_consume_token(22); - break; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case 10:{ + attrType = jj_consume_token(10); + break; + } + case 22:{ + attrType = jj_consume_token(22); + break; + } + default: + jj_la1[23] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); } - default: - jj_la1[23] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case IDENTIFIER:{ - value = jj_consume_token(IDENTIFIER); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case IDENTIFIER:{ + value = jj_consume_token(IDENTIFIER); + break; + } + case REGEX:{ + value = jj_consume_token(REGEX); + break; + } + default: + jj_la1[24] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +if (attr != null && value != null) { + negated = attrType.image.equals("!:"); + attributes.setAttribute(attr.image, value.image, negated); + } break; } - case REGEX:{ - value = jj_consume_token(REGEX); + case 23:{ + jj_consume_token(23); + key = jj_consume_token(IDENTIFIER); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case 10:{ + attrType = jj_consume_token(10); + break; + } + case 22:{ + attrType = jj_consume_token(22); + break; + } + default: + jj_la1[25] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case IDENTIFIER:{ + value = jj_consume_token(IDENTIFIER); + break; + } + case REGEX:{ + value = jj_consume_token(REGEX); + break; + } + default: + jj_la1[26] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +if (attr == null || key == null || value == null) { + {if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr + + " key=" + key + " value=" + value);} + } + negated = attrType.image.equals("!:"); + attributes.addContains(attr.image, key.image, value.image, negated); + label_6: + while (true) { + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case 24:{ + ; + break; + } + default: + jj_la1[27] = jj_gen; + break label_6; + } + jj_consume_token(24); + key = jj_consume_token(IDENTIFIER); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case 10:{ + attrType = jj_consume_token(10); + break; + } + case 22:{ + attrType = jj_consume_token(22); + break; + } + default: + jj_la1[28] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case IDENTIFIER:{ + value = jj_consume_token(IDENTIFIER); + break; + } + case REGEX:{ + value = jj_consume_token(REGEX); + break; + } + default: + jj_la1[29] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +if (attr == null || key == null || value == null) { + {if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr + + " key=" + key + " value=" + value);} + } + negated = attrType.image.equals("!:"); + attributes.addContains(attr.image, key.image, value.image, negated); + } + jj_consume_token(25); break; } default: - jj_la1[24] = jj_gen; + jj_la1[30] = jj_gen; jj_consume_token(-1); throw new ParseException(); } -if (attr != null && value != null) { - boolean negated = attrType.image.equals("!:"); - attributes.setAttribute(attr.image, value.image, negated); - } break; } case ROOT:{ @@ -576,7 +676,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th break; } default: - jj_la1[25] = jj_gen; + jj_la1[31] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -586,13 +686,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th boolean link = false; NodeAttributes attributes = new NodeAttributes(); NodePattern pat; - jj_consume_token(23); + jj_consume_token(26); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case IDENTIFIER: case EMPTY: case ROOT:{ AddAttribute(attributes); - label_6: + label_7: while (true) { switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case 24:{ @@ -600,8 +700,8 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th break; } default: - jj_la1[26] = jj_gen; - break label_6; + jj_la1[32] = jj_gen; + break label_7; } jj_consume_token(24); AddAttribute(attributes); @@ -609,7 +709,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th break; } default: - jj_la1[27] = jj_gen; + jj_la1[33] = jj_gen; ; } jj_consume_token(25); @@ -629,7 +729,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th break; } default: - jj_la1[28] = jj_gen; + jj_la1[34] = jj_gen; ; } pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null); @@ -646,13 +746,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th public Token jj_nt; private int jj_ntk; private int jj_gen; - final private int[] jj_la1 = new int[29]; + final private int[] jj_la1 = new int[35]; static private int[] jj_la1_0; static { jj_la1_init_0(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0xd0,0x1000000,0xd0,0x200000,}; + jj_la1_0 = new int[] {0x400,0x4028808,0x3801c,0x3801c,0x4028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x4028800,0x2000,0x402c000,0x4000,0x4028000,0x4020000,0x400400,0x110,0x400400,0x110,0x1000000,0x400400,0x110,0xc00400,0xd0,0x1000000,0xd0,0x200000,}; } /** Constructor with InputStream. */ @@ -666,7 +766,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } /** Reinitialise. */ @@ -680,7 +780,7 @@ public void ReInit(java.io.InputStream stream, String encoding) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } /** Constructor. */ @@ -690,7 +790,7 @@ public SemgrexParser(java.io.Reader stream) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } /** Reinitialise. */ @@ -708,7 +808,7 @@ public void ReInit(java.io.Reader stream) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } /** Constructor with generated Token Manager. */ @@ -717,7 +817,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } /** Reinitialise. */ @@ -726,7 +826,7 @@ public void ReInit(SemgrexParserTokenManager tm) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 29; i++) jj_la1[i] = -1; + for (int i = 0; i < 35; i++) jj_la1[i] = -1; } private Token jj_consume_token(int kind) throws ParseException { @@ -777,12 +877,12 @@ private int jj_ntk_f() { /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[26]; + boolean[] la1tokens = new boolean[27]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; } - for (int i = 0; i < 29; i++) { + for (int i = 0; i < 35; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1< (attrType = ":" | attrType = "!:") (value = | value = ) ) - { - if (attr != null && value != null) { - boolean negated = attrType.image.equals("!:"); - attributes.setAttribute(attr.image, value.image, negated); - } - }) + (attr = + (( (attrType = ":" | attrType = "!:") (value = | value = ) { + if (attr != null && value != null) { + negated = attrType.image.equals("!:"); + attributes.setAttribute(attr.image, value.image, negated); + } + }) + | + ( ":{" + ((key = ) (attrType = ":" | attrType = "!:") (value = | value = ) + { + if (attr == null || key == null || value == null) { + throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr + + " key=" + key + " value=" + value); + } + negated = attrType.image.equals("!:"); + attributes.addContains(attr.image, key.image, value.image, negated); + }) + ( ";" (key = ) (attrType = ":" | attrType = "!:") (value = | value = ) + { + if (attr == null || key == null || value == null) { + throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr + + " key=" + key + " value=" + value); + } + negated = attrType.image.equals("!:"); + attributes.addContains(attr.image, key.image, value.image, negated); + })* + "}" )) + ) | ( attr = { attributes.setRoot(true); } ) | diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java index 7a55891f0c..891073b9ff 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java @@ -55,9 +55,10 @@ interface SemgrexParserConstants { "\"~\"", "\"=\"", "\"!:\"", - "\"{\"", + "\":{\"", "\";\"", "\"}\"", + "\"{\"", }; } diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java index e3fe4d9933..4433fbc369 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java @@ -61,7 +61,8 @@ private int jjMoveStringLiteralDfa0_0(){ case 44: return jjStopAtPos(0, 19); case 58: - return jjStopAtPos(0, 10); + jjmatchedKind = 10; + return jjMoveStringLiteralDfa1_0(0x800000L); case 59: return jjStopAtPos(0, 24); case 61: @@ -75,7 +76,7 @@ private int jjMoveStringLiteralDfa0_0(){ case 93: return jjStopAtPos(0, 18); case 123: - return jjStopAtPos(0, 23); + return jjStopAtPos(0, 26); case 124: return jjStopAtPos(0, 13); case 125: @@ -98,6 +99,10 @@ private int jjMoveStringLiteralDfa1_0(long active0){ if ((active0 & 0x400000L) != 0L) return jjStopAtPos(1, 22); break; + case 123: + if ((active0 & 0x800000L) != 0L) + return jjStopAtPos(1, 23); + break; default : break; } @@ -358,7 +363,7 @@ else if (curChar < 128) public static final String[] jjstrLiteralImages = { "", null, null, "\100", null, null, "\43", "\44", null, "\12", "\72", "\50", "\51", "\174", "\46", "\41", "\77", "\133", "\135", "\54", "\176", "\75", "\41\72", -"\173", "\73", "\175", }; +"\72\173", "\73", "\175", "\173", }; protected Token jjFillToken() { final Token t; @@ -595,10 +600,10 @@ public void SwitchTo(int lexState) /** Lex State array. */ public static final int[] jjnewLexState = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, + -1, -1, }; static final long[] jjtoToken = { - 0x3fffffdL, + 0x7fffffdL, }; static final long[] jjtoSkip = { 0x2L, diff --git a/test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java b/test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java index 3c2b785f34..a43ab168f1 100644 --- a/test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java +++ b/test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java @@ -7,10 +7,12 @@ import java.util.Map; import java.util.Set; +import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.stats.IntCounter; import edu.stanford.nlp.trees.UniversalEnglishGrammaticalRelations; import edu.stanford.nlp.trees.Tree; +import edu.stanford.nlp.trees.ud.CoNLLUFeatures; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.semgraph.SemanticGraphFactory; @@ -222,6 +224,76 @@ public void testNegatedRegex() { "ate", "blueberry"); } + public void testBrokenContainsExpression() { + try { + // word is a String, not a Map, so this should throw a parse exception + SemgrexPattern pattern = SemgrexPattern.compile("{word{foo=bar}}"); + throw new AssertionError("Expected a SemgrexParseException"); + } catch (SemgrexParseException e) { + // good + } + } + + public void testContainsExpression() { + // morphofeatures is a Map, so this should work + SemgrexPattern pattern = SemgrexPattern.compile("{morphofeatures:{foo:bar}}"); + SemanticGraph graph = makeComplicatedGraph(); + Set vertices = graph.vertexSet(); + for (IndexedWord iw : vertices) { + if (iw.value().equals("D") || iw.value().equals("F")) { + CoNLLUFeatures feats = new CoNLLUFeatures(); + feats.put("foo", "bar"); + iw.set(CoreAnnotations.CoNLLUFeats.class, feats); + } + } + runTest(pattern, graph, "D", "F"); + } + + public void testContainsRegexExpression() { + // morphofeatures is a Map, so this should work + SemanticGraph graph = makeComplicatedGraph(); + Set vertices = graph.vertexSet(); + for (IndexedWord iw : vertices) { + if (iw.value().equals("B") || iw.value().equals("D") || iw.value().equals("F")) { + CoNLLUFeatures feats = new CoNLLUFeatures(); + feats.put("foo", "bar" + iw.value()); + iw.set(CoreAnnotations.CoNLLUFeats.class, feats); + } + } + + // test a positive regex + SemgrexPattern pattern = SemgrexPattern.compile("{morphofeatures:{foo:/bar[BD]/}}"); + runTest(pattern, graph, "B", "D"); + + // test a negative regex + // should match both the ones that don't have features + // and the ones that have a non-matching feature + pattern = SemgrexPattern.compile("{morphofeatures:{foo!:/bar[BD]/}}"); + runTest(pattern, graph, "A", "C", "E", "F", "G", "H", "I", "J"); + } + + public void testDoubleContainsExpression() { + // morphofeatures is a Map, so this should work + SemanticGraph graph = makeComplicatedGraph(); + Set vertices = graph.vertexSet(); + for (IndexedWord iw : vertices) { + if (iw.value().equals("B") || iw.value().equals("D") || iw.value().equals("F")) { + CoNLLUFeatures feats = new CoNLLUFeatures(); + feats.put("foo", "bar"); + feats.put("name", iw.value()); + iw.set(CoreAnnotations.CoNLLUFeats.class, feats); + } + } + + // test a positive regex + SemgrexPattern pattern = SemgrexPattern.compile("{morphofeatures:{foo:/bar/;name:/[BD]/}}"); + runTest(pattern, graph, "B", "D"); + + // test one positive, one negative regex + pattern = SemgrexPattern.compile("{morphofeatures:{foo:/bar/;name!:/[BD]/}}"); + runTest(pattern, graph, "F"); + } + public void testReferencedRegex() { runTest("{word:/Bill/}", "[ate subj>Bill obj>[bill det>the]]", "Bill");