Skip to content

Commit 6f67055

Browse files
author
Max Schaefer
committed
Correctly account for length of string literal prefix when computing locations for RegExpTerms.
1 parent d4ff9c8 commit 6f67055

File tree

5 files changed

+44
-28
lines changed

5 files changed

+44
-28
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: fix
3+
---
4+
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.

python/ql/lib/semmle/python/regexp/RegexTreeView.qll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,11 @@ module Impl implements RegexTreeViewSig {
227227
predicate hasLocationInfo(
228228
string filepath, int startline, int startcolumn, int endline, int endcolumn
229229
) {
230-
exists(int re_start |
230+
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
231231
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
232-
startcolumn = re_start + start + 4 and
233-
endcolumn = re_start + end + 3
232+
startcolumn = re_start + start + prefix_len and
233+
endcolumn = re_start + end + prefix_len - 1
234+
/* inclusive vs exclusive */
234235
)
235236
}
236237

python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ private module FindRegexMode {
101101
}
102102

103103
/**
104-
* DEPRECATED: Use `Regex` instead.
104+
* DEPRECATED: Use `RegExp` instead.
105105
*/
106106
deprecated class Regex = RegExp;
107107

@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
327327
/** Gets the text of this regex */
328328
string getText() { result = super.getText() }
329329

330+
/**
331+
* Gets the prefix of this regex
332+
*
333+
* Examples:
334+
*
335+
* - The prefix of `'x*y'` is `'`.
336+
* - The prefix of `r''` is `r'`.
337+
* - The prefix of `r"""x*y"""` is `r"""`.
338+
*/
339+
string getPrefix() { result = super.getPrefix() }
340+
330341
/** Gets the `i`th character of this regex */
331342
string getChar(int i) { result = this.getText().charAt(i) }
332343

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
| locations.py | 14 | 5 |
2-
| locations.py | 19 | 5 |
3-
| locations.py | 24 | 5 |
4-
| locations.py | 29 | 5 |
5-
| locations.py | 34 | 5 |
1+
| locations.py | 14 | 2 |
2+
| locations.py | 19 | 3 |
3+
| locations.py | 24 | 3 |
4+
| locations.py | 29 | 4 |
5+
| locations.py | 34 | 4 |
66
| locations.py | 39 | 5 |
77
| locations.py | 44 | 5 |
8-
| locations.py | 49 | 5 |
9-
| locations.py | 54 | 5 |
10-
| locations.py | 54 | 26 |
11-
| locations.py | 59 | 5 |
12-
| locations.py | 59 | 26 |
13-
| locations.py | 65 | 5 |
14-
| locations.py | 65 | 26 |
8+
| locations.py | 49 | 6 |
9+
| locations.py | 54 | 2 |
10+
| locations.py | 54 | 23 |
11+
| locations.py | 59 | 2 |
12+
| locations.py | 59 | 23 |
13+
| locations.py | 65 | 2 |
14+
| locations.py | 65 | 23 |
1515
| locations.py | 72 | 6 |
1616
| locations.py | 72 | 27 |
17-
| locations.py | 80 | 6 |
18-
| locations.py | 85 | 7 |
19-
| locations.py | 90 | 5 |
20-
| locations.py | 90 | 26 |
17+
| locations.py | 80 | 3 |
18+
| locations.py | 85 | 5 |
19+
| locations.py | 90 | 2 |
20+
| locations.py | 90 | 23 |

python/ql/test/library-tests/regexparser/locations.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
# regexp term `[this]`, appearing in various kinds of regexps.
77
#
88
# To make the location information easier to understand, we generally put each
9-
# regexp on its own line, even though this is not the way one would normally
10-
# write regexps in Python.
9+
# regexp on its own line, even though this is not idiomatic Python.
10+
# Comments indicate cases we currently do not handle correctly.
1111

1212
# plain string
1313
re.compile(
@@ -49,33 +49,33 @@
4949
br'''[this] is a test'''
5050
)
5151

52-
# plain string with multiple parts
52+
# plain string with multiple parts (second [this] gets wrong column: 23 instead of 26)
5353
re.compile(
5454
'[this] is a test' ' and [this] is another test'
5555
)
5656

57-
# plain string with multiple parts across lines
57+
# plain string with multiple parts across lines (second [this] gets wrong location: 59:23 instead of 60:7)
5858
re.compile(
5959
'[this] is a test'
6060
' and [this] is another test'
6161
)
6262

63-
# plain string with multiple parts across lines and comments
63+
# plain string with multiple parts across lines and comments (second [this] gets wrong location: 65:23 instead of 67:7)
6464
re.compile(
6565
'[this] is a test'
6666
# comment
6767
' and [this] is another test'
6868
)
6969

70-
# actual multiline string
70+
# actual multiline string (both [this]s get wrong location: 72:6 and 72:27 instead of 73:1 and 74:5)
7171
re.compile(
7272
r'''
7373
[this] is a test
7474
and [this] is another test
7575
'''
7676
)
7777

78-
# plain string with escape sequences
78+
# plain string with escape sequences ([this] gets wrong location: 80:3 instead of 80:4)
7979
re.compile(
8080
'\t[this] is a test'
8181
)
@@ -85,7 +85,7 @@
8585
r'\A[this] is a test'
8686
)
8787

88-
# plain string with escaped newline
88+
# plain string with escaped newline (second [this] gets wrong location: 90:23 instead of 91:6)
8989
re.compile(
9090
'[this] is a test\
9191
and [this] is another test'

0 commit comments

Comments
 (0)