Skip to content

Commit fa1e8ee

Browse files
committed
add getACodepoint to the shared Strings library, and use it in NfaUtils
1 parent 822ba2a commit fa1e8ee

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

shared/regex/codeql/regex/nfa/NfaUtils.qll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,17 @@ module Make<RegexTreeViewSig TreeImpl> {
164164
/** An input symbol corresponding to character `c`. */
165165
Char(string c) {
166166
c =
167-
getCodepointAt(any(RegexpCharacterConstant cc |
167+
getACodepoint(any(RegexpCharacterConstant cc |
168168
cc instanceof RelevantRegExpTerm and
169169
not isIgnoreCase(cc.getRootTerm())
170-
).getValue(), _)
170+
).getValue())
171171
or
172172
// normalize everything to lower case if the regexp is case insensitive
173173
c =
174174
any(RegexpCharacterConstant cc, string char |
175175
cc instanceof RelevantRegExpTerm and
176176
isIgnoreCase(cc.getRootTerm()) and
177-
char = getCodepointAt(cc.getValue(), _)
177+
char = getACodepoint(cc.getValue())
178178
|
179179
char.toLowerCase()
180180
)
@@ -370,7 +370,7 @@ module Make<RegexTreeViewSig TreeImpl> {
370370
string getARelevantChar() {
371371
exists(asciiPrintable(result))
372372
or
373-
exists(RegexpCharacterConstant c | result = getCodepointAt(c.getValue(), _))
373+
exists(RegexpCharacterConstant c | result = getACodepoint(c.getValue()))
374374
or
375375
classEscapeMatches(_, result)
376376
}
@@ -1258,7 +1258,7 @@ module Make<RegexTreeViewSig TreeImpl> {
12581258
* Gets a `char` that occurs in a `pump` string.
12591259
*/
12601260
private string getAProcessChar() {
1261-
result = getCodepointAt(any(string s | isReDoSCandidate(_, s)), _)
1261+
result = getACodepoint(any(string s | isReDoSCandidate(_, s)))
12621262
}
12631263
}
12641264

shared/util/codeql/util/Strings.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ string getCodepointAt(string s, int i) {
6363
)
6464
}
6565

66+
/**
67+
* Gets any unicode character that appears in `s`.
68+
*/
69+
bindingset[s]
70+
string getACodepoint(string s) { result = s.codePointAt(_).toUnicode() }
71+
6672
/**
6773
* Gets the number of unicode codepoints in `s` not counting unpaired surrogates.
6874
*/

0 commit comments

Comments
 (0)