55import java .util .Iterator ;
66import java .util .List ;
77import java .util .Map ;
8+ import java .util .TreeMap ;
89import java .util .regex .Matcher ;
910import java .util .regex .Pattern ;
1011
@@ -32,6 +33,11 @@ public class NodePattern extends SemgrexPattern {
3233 * Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3334 */
3435 private final List <Attribute > attributes ;
36+ /**
37+ * Attributes which represent Maps (eg CoNLLUFeats)
38+ * and only partial matches are necessary
39+ */
40+ private final Map <String , Attribute > partialAttributes ;
3541 private final boolean isRoot ;
3642 private final boolean isLink ;
3743 private final boolean isEmpty ;
@@ -58,6 +64,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
5864 // order the attributes so that the pattern stays the same when
5965 // printing a compiled pattern
6066 this .attributes = new ArrayList <>();
67+ // same with partial attributes - use a TreeMap to keep things in order
68+ this .partialAttributes = new TreeMap <>();
69+
6170 descString = "{" ;
6271 for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
6372 if (!descString .equals ("{" ))
@@ -70,23 +79,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
7079 if (value .equals ("__" )) {
7180 attributes .add (new Attribute (key , true , true , negated ));
7281 } else if (value .matches ("/.*/" )) {
73- boolean isRegexp = false ;
74- for (int i = 1 ; i < value .length () - 1 ; ++i ) {
75- char chr = value .charAt (i );
76- if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
77- isRegexp = true ;
78- break ;
79- }
80- }
81- String patternContent = value .substring (1 , value .length () - 1 );
82- if (isRegexp ) {
83- attributes .add (new Attribute (key ,
84- Pattern .compile (patternContent ),
85- Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
86- negated ));
87- } else {
88- attributes .add (new Attribute (key , patternContent , patternContent , negated ));
89- }
82+ attributes .add (buildRegexAttribute (key , value , negated ));
9083 } else { // raw description
9184 attributes .add (new Attribute (key , value , value , negated ));
9285 }
@@ -98,6 +91,25 @@ public NodePattern(GraphRelation r, boolean negDesc,
9891 }
9992 }
10093
94+ for (Triple <String , String , String > entry : attrs .contains ()) {
95+ String annotation = entry .first ();
96+ String key = entry .second ();
97+ String value = entry .third ();
98+
99+ // Add the attributes for this key
100+ if (value .equals ("__" )) {
101+ partialAttributes .put (annotation , new Attribute (key , true , true , false ));
102+ } else if (value .matches ("/.*/" )) {
103+ partialAttributes .put (annotation , buildRegexAttribute (key , value , false ));
104+ } else { // raw description
105+ partialAttributes .put (annotation , new Attribute (key , value , value , false ));
106+ }
107+
108+ if (!descString .equals ("{" ))
109+ descString += ";" ;
110+ descString += (annotation + "@" + key + "=" + value );
111+ }
112+
101113 if (attrs .root ()) {
102114 if (!descString .equals ("{" ))
103115 descString += ";" ;
@@ -118,6 +130,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
118130 this .variableGroups = Collections .unmodifiableList (variableGroups );
119131 }
120132
133+ /**
134+ * Tests the value to see if it's really a regex, or just a string wrapped in regex.
135+ * Return an Attribute which matches this expression
136+ */
137+ private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
138+ boolean isRegexp = false ;
139+ for (int i = 1 ; i < value .length () - 1 ; ++i ) {
140+ char chr = value .charAt (i );
141+ if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
142+ isRegexp = true ;
143+ break ;
144+ }
145+ }
146+ String patternContent = value .substring (1 , value .length () - 1 );
147+ if (isRegexp ) {
148+ return new Attribute (key ,
149+ Pattern .compile (patternContent ),
150+ Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
151+ negated );
152+ } else {
153+ return new Attribute (key , patternContent , patternContent , negated );
154+ }
155+ }
156+
121157 @ SuppressWarnings ("unchecked" )
122158 public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ) {
123159 // System.out.println(node.word());
0 commit comments