Skip to content

Commit 81a5692

Browse files
committed
Python: handle \uxxxx and refactor
1 parent c7ddd2c commit 81a5692

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,35 +124,47 @@ abstract class RegexString extends Expr {
124124
)
125125
}
126126

127-
// escaped characters without any special handling (yet)
127+
/** Escaped characters without any special handling (yet) */
128128
private predicate singleEscape(int i) {
129129
exists(string c |
130130
c = this.getChar(i) and
131-
c != "x" and c != "U" and c!= "N"
131+
c != "x" and c != "u" and c != "U" and c != "N"
132132
)
133-
}
133+
}
134+
135+
/** Named unicode characters, eg \N{degree sign} */
136+
private predicate escapedName(int start, int end) {
137+
this.getChar(start + 1) = "N" and
138+
this.getChar(start + 2) = "{" and
139+
this.getChar(end - 1) = "}" and
140+
end > start and
141+
not exists(int i |
142+
i > start + 2 and
143+
i < end - 1 and
144+
this.getChar(i) = "}"
145+
)
146+
}
134147

135148
private predicate escapedCharacter(int start, int end) {
136149
this.escapingChar(start) and
137150
not exists(this.getText().substring(start + 1, end + 1).toInt()) and
138151
(
152+
// hex value \xhh
139153
this.getChar(start + 1) = "x" and end = start + 4
140154
or
155+
// octal value \ooo
141156
end in [start + 2 .. start + 4] and
142157
exists(this.getText().substring(start + 1, end).toInt())
143158
or
159+
// 16-bit hex value
160+
this.getChar(start + 1) = "u" and end = start + 6
161+
or
162+
// 32-bit hex value
144163
this.getChar(start + 1) = "U" and end = start + 10
145164
or
146-
this.getChar(start + 1) = "N" and
147-
this.getChar(start + 2) = "{" and
148-
this.getChar(end - 1) = "}" and
149-
end > start and
150-
not exists(int i |
151-
i > start + 2 and
152-
i < end - 1 and
153-
this.getChar(i) = "}"
154-
)
165+
escapedName(start, end)
155166
or
167+
// single character not handled above, update when adding a new case
156168
this.singleEscape(start + 1) and end = start + 2
157169
)
158170
}

python/ql/test/query-tests/Expressions/Regex/test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142

143143
#Allow unicode in raw strings
144144
re.compile(r"[\U00010000-\U0010FFFF]")
145+
re.compile(r"[\u0000-\uFFFF]")
145146

146147
#Allow unicode names
147148
re.compile(r"[\N{degree sign}\N{EM DASH}]")

0 commit comments

Comments
 (0)