@@ -32,6 +32,11 @@ public class NodePattern extends SemgrexPattern {
3232 * Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3333 */
3434 private final List <Attribute > attributes ;
35+ /**
36+ * Attributes which represent Maps (eg CoNLLUFeats)
37+ * and only partial matches are necessary
38+ */
39+ private final List <Pair <String , Attribute >> partialAttributes ;
3540 private final boolean isRoot ;
3641 private final boolean isLink ;
3742 private final boolean isEmpty ;
@@ -58,6 +63,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
5863 // order the attributes so that the pattern stays the same when
5964 // printing a compiled pattern
6065 this .attributes = new ArrayList <>();
66+ // same with partial attributes
67+ this .partialAttributes = new ArrayList <>();
68+
6169 descString = "{" ;
6270 for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
6371 if (!descString .equals ("{" ))
@@ -70,23 +78,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
7078 if (value .equals ("__" )) {
7179 attributes .add (new Attribute (key , true , true , negated ));
7280 } else if (value .matches ("/.*/" )) {
73- boolean isRegexp = false ;
74- for (int i = 1 ; i < value .length () - 1 ; ++i ) {
75- char chr = value .charAt (i );
76- if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
77- isRegexp = true ;
78- break ;
79- }
80- }
81- String patternContent = value .substring (1 , value .length () - 1 );
82- if (isRegexp ) {
83- attributes .add (new Attribute (key ,
84- Pattern .compile (patternContent ),
85- Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
86- negated ));
87- } else {
88- attributes .add (new Attribute (key , patternContent , patternContent , negated ));
89- }
81+ attributes .add (buildRegexAttribute (key , value , negated ));
9082 } else { // raw description
9183 attributes .add (new Attribute (key , value , value , negated ));
9284 }
@@ -98,6 +90,27 @@ public NodePattern(GraphRelation r, boolean negDesc,
9890 }
9991 }
10092
93+ for (Triple <String , String , String > entry : attrs .contains ()) {
94+ String annotation = entry .first ();
95+ String key = entry .second ();
96+ String value = entry .third ();
97+
98+ final Attribute attr ;
99+ // Add the attributes for this key
100+ if (value .equals ("__" )) {
101+ attr = new Attribute (key , true , true , false );
102+ } else if (value .matches ("/.*/" )) {
103+ attr = buildRegexAttribute (key , value , false );
104+ } else { // raw description
105+ attr = new Attribute (key , value , value , false );
106+ }
107+ partialAttributes .add (new Pair <>(annotation , attr ));
108+
109+ if (!descString .equals ("{" ))
110+ descString += ";" ;
111+ descString += (annotation + "@" + key + "=" + value );
112+ }
113+
101114 if (attrs .root ()) {
102115 if (!descString .equals ("{" ))
103116 descString += ";" ;
@@ -118,6 +131,53 @@ public NodePattern(GraphRelation r, boolean negDesc,
118131 this .variableGroups = Collections .unmodifiableList (variableGroups );
119132 }
120133
134+ /**
135+ * Tests the value to see if it's really a regex, or just a string wrapped in regex.
136+ * Return an Attribute which matches this expression
137+ */
138+ private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
139+ boolean isRegexp = false ;
140+ for (int i = 1 ; i < value .length () - 1 ; ++i ) {
141+ char chr = value .charAt (i );
142+ if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
143+ isRegexp = true ;
144+ break ;
145+ }
146+ }
147+ String patternContent = value .substring (1 , value .length () - 1 );
148+ if (isRegexp ) {
149+ return new Attribute (key ,
150+ Pattern .compile (patternContent ),
151+ Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
152+ negated );
153+ } else {
154+ return new Attribute (key , patternContent , patternContent , negated );
155+ }
156+ }
157+
158+ private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ) {
159+ boolean matches ;
160+
161+ Object toMatch = ignoreCase ? attr .caseless : attr .cased ;
162+ if (toMatch instanceof Boolean ) {
163+ matches = ((Boolean ) toMatch );
164+ } else if (toMatch instanceof String ) {
165+ if (ignoreCase ) {
166+ matches = nodeValue .equalsIgnoreCase (toMatch .toString ());
167+ } else {
168+ matches = nodeValue .equals (toMatch .toString ());
169+ }
170+ } else if (toMatch instanceof Pattern ) {
171+ matches = ((Pattern ) toMatch ).matcher (nodeValue ).matches ();
172+ } else {
173+ throw new IllegalStateException ("Unknown matcher type: " + toMatch + " (of class + " + toMatch .getClass () + ")" );
174+ }
175+ if (attr .negated ) {
176+ matches = !matches ;
177+ }
178+ return matches ;
179+ }
180+
121181 @ SuppressWarnings ("unchecked" )
122182 public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ) {
123183 // System.out.println(node.word());
@@ -156,31 +216,20 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
156216 return negDesc ;
157217
158218 // Get the node pattern
159- Object toMatch = ignoreCase ? attr .caseless : attr .cased ;
160- boolean matches ;
161- if (toMatch instanceof Boolean ) {
162- matches = ((Boolean ) toMatch );
163- } else if (toMatch instanceof String ) {
164- if (ignoreCase ) {
165- matches = nodeValue .equalsIgnoreCase (toMatch .toString ());
166- } else {
167- matches = nodeValue .equals (toMatch .toString ());
168- }
169- } else if (toMatch instanceof Pattern ) {
170- matches = ((Pattern ) toMatch ).matcher (nodeValue ).matches ();
171- } else {
172- throw new IllegalStateException ("Unknown matcher type: " + toMatch + " (of class + " + toMatch .getClass () + ")" );
173- }
174- if (attr .negated ) {
175- matches = !matches ;
176- }
219+ boolean matches = checkMatch (attr , ignoreCase , nodeValue );
177220
178221 if (!matches ) {
179222 // System.out.println("doesn't match");
180223 // System.out.println("");
181224 return negDesc ;
182225 }
183226 }
227+ for (Pair <String , Attribute > partialAttribute : partialAttributes ) {
228+ String annotation = partialAttribute .first ();
229+ Attribute attr = partialAttribute .second ();
230+ // TODO
231+ }
232+
184233 // System.out.println("matches");
185234 // System.out.println("");
186235 return !negDesc ;
0 commit comments