@@ -124,35 +124,47 @@ abstract class RegexString extends Expr {
124
124
)
125
125
}
126
126
127
- // escaped characters without any special handling (yet)
127
+ /** Escaped characters without any special handling (yet) */
128
128
private predicate singleEscape ( int i ) {
129
129
exists ( string c |
130
130
c = this .getChar ( i ) and
131
- c != "x" and c != "U" and c != "N"
131
+ c != "x" and c != "u" and c != " U" and c != "N"
132
132
)
133
- }
133
+ }
134
+
135
+ /** Named unicode characters, eg \N{degree sign} */
136
+ private predicate escapedName ( int start , int end ) {
137
+ this .getChar ( start + 1 ) = "N" and
138
+ this .getChar ( start + 2 ) = "{" and
139
+ this .getChar ( end - 1 ) = "}" and
140
+ end > start and
141
+ not exists ( int i |
142
+ i > start + 2 and
143
+ i < end - 1 and
144
+ this .getChar ( i ) = "}"
145
+ )
146
+ }
134
147
135
148
private predicate escapedCharacter ( int start , int end ) {
136
149
this .escapingChar ( start ) and
137
150
not exists ( this .getText ( ) .substring ( start + 1 , end + 1 ) .toInt ( ) ) and
138
151
(
152
+ // hex value \xhh
139
153
this .getChar ( start + 1 ) = "x" and end = start + 4
140
154
or
155
+ // octal value \ooo
141
156
end in [ start + 2 .. start + 4 ] and
142
157
exists ( this .getText ( ) .substring ( start + 1 , end ) .toInt ( ) )
143
158
or
159
+ // 16-bit hex value
160
+ this .getChar ( start + 1 ) = "u" and end = start + 6
161
+ or
162
+ // 32-bit hex value
144
163
this .getChar ( start + 1 ) = "U" and end = start + 10
145
164
or
146
- this .getChar ( start + 1 ) = "N" and
147
- this .getChar ( start + 2 ) = "{" and
148
- this .getChar ( end - 1 ) = "}" and
149
- end > start and
150
- not exists ( int i |
151
- i > start + 2 and
152
- i < end - 1 and
153
- this .getChar ( i ) = "}"
154
- )
165
+ escapedName ( start , end )
155
166
or
167
+ // single character not handled above, update when adding a new case
156
168
this .singleEscape ( start + 1 ) and end = start + 2
157
169
)
158
170
}
0 commit comments