88import java .util .regex .Matcher ;
99import java .util .regex .Pattern ;
1010
11+ import edu .stanford .nlp .ling .AnnotationLookup ;
1112import edu .stanford .nlp .ling .IndexedWord ;
1213import edu .stanford .nlp .semgraph .SemanticGraph ;
1314import edu .stanford .nlp .semgraph .SemanticGraphEdge ;
@@ -32,6 +33,11 @@ public class NodePattern extends SemgrexPattern {
3233 * Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3334 */
3435 private final List <Attribute > attributes ;
36+ /**
37+ * Attributes which represent Maps (eg CoNLLUFeats)
38+ * and only partial matches are necessary
39+ */
40+ private final List <Pair <String , Attribute >> partialAttributes ;
3541 private final boolean isRoot ;
3642 private final boolean isLink ;
3743 private final boolean isEmpty ;
@@ -58,6 +64,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
5864 // order the attributes so that the pattern stays the same when
5965 // printing a compiled pattern
6066 this .attributes = new ArrayList <>();
67+ // same with partial attributes
68+ this .partialAttributes = new ArrayList <>();
69+
6170 descString = "{" ;
6271 for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
6372 if (!descString .equals ("{" ))
@@ -70,23 +79,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
7079 if (value .equals ("__" )) {
7180 attributes .add (new Attribute (key , true , true , negated ));
7281 } else if (value .matches ("/.*/" )) {
73- boolean isRegexp = false ;
74- for (int i = 1 ; i < value .length () - 1 ; ++i ) {
75- char chr = value .charAt (i );
76- if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
77- isRegexp = true ;
78- break ;
79- }
80- }
81- String patternContent = value .substring (1 , value .length () - 1 );
82- if (isRegexp ) {
83- attributes .add (new Attribute (key ,
84- Pattern .compile (patternContent ),
85- Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
86- negated ));
87- } else {
88- attributes .add (new Attribute (key , patternContent , patternContent , negated ));
89- }
82+ attributes .add (buildRegexAttribute (key , value , negated ));
9083 } else { // raw description
9184 attributes .add (new Attribute (key , value , value , negated ));
9285 }
@@ -98,6 +91,33 @@ public NodePattern(GraphRelation r, boolean negDesc,
9891 }
9992 }
10093
94+ for (Triple <String , String , String > entry : attrs .contains ()) {
95+ String annotation = entry .first ();
96+ String key = entry .second ();
97+ String value = entry .third ();
98+
99+ Class <?> clazz = AnnotationLookup .getValueType (AnnotationLookup .toCoreKey (annotation ));
100+ boolean isMap = clazz != null && Map .class .isAssignableFrom (clazz );
101+ if (!isMap ) {
102+ throw new SemgrexParseException ("Cannot process a single key/value from annotation " + annotation + " as it is not a Map" );
103+ }
104+
105+ final Attribute attr ;
106+ // Add the attributes for this key
107+ if (value .equals ("__" )) {
108+ attr = new Attribute (key , true , true , false );
109+ } else if (value .matches ("/.*/" )) {
110+ attr = buildRegexAttribute (key , value , false );
111+ } else { // raw description
112+ attr = new Attribute (key , value , value , false );
113+ }
114+ partialAttributes .add (new Pair <>(annotation , attr ));
115+
116+ if (!descString .equals ("{" ))
117+ descString += ";" ;
118+ descString += (annotation + "@" + key + "=" + value );
119+ }
120+
101121 if (attrs .root ()) {
102122 if (!descString .equals ("{" ))
103123 descString += ";" ;
@@ -118,6 +138,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
118138 this .variableGroups = Collections .unmodifiableList (variableGroups );
119139 }
120140
141+ /**
142+ * Tests the value to see if it's really a regex, or just a string wrapped in regex.
143+ * Return an Attribute which matches this expression
144+ */
145+ private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
146+ boolean isRegexp = false ;
147+ for (int i = 1 ; i < value .length () - 1 ; ++i ) {
148+ char chr = value .charAt (i );
149+ if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
150+ isRegexp = true ;
151+ break ;
152+ }
153+ }
154+ String patternContent = value .substring (1 , value .length () - 1 );
155+ if (isRegexp ) {
156+ return new Attribute (key ,
157+ Pattern .compile (patternContent ),
158+ Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
159+ negated );
160+ } else {
161+ return new Attribute (key , patternContent , patternContent , negated );
162+ }
163+ }
164+
121165 private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ) {
122166 if (nodeValue == null ) {
123167 // treat non-existent attributes has having matched a negated expression
@@ -189,6 +233,29 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
189233 return negDesc ;
190234 }
191235 }
236+ for (Pair <String , Attribute > partialAttribute : partialAttributes ) {
237+ String annotation = partialAttribute .first ();
238+ Attribute attr = partialAttribute .second ();
239+
240+ Class clazz = Env .lookupAnnotationKey (env , annotation );
241+ Object rawmap = node .get (clazz );
242+ // if the map is null, it can't possibly match...
243+ if (rawmap == null ) {
244+ return negDesc ;
245+ }
246+ if (!(rawmap instanceof Map ))
247+ throw new RuntimeException ("Can only use partial attributes with Maps... this should have been checked at creation time!" );
248+ Map <String , ?> map = (Map ) rawmap ;
249+
250+ // TODO: allow for regex match on the keys?
251+ Object value = map .get (attr .key );
252+ final String nodeValue = (value == null ) ? null : value .toString ();
253+ boolean matches = checkMatch (attr , ignoreCase , nodeValue );
254+ if (!matches ) {
255+ return negDesc ;
256+ }
257+ }
258+
192259 // System.out.println("matches");
193260 // System.out.println("");
194261 return !negDesc ;
0 commit comments