Skip to content

Commit c1ebde4

Browse files
committed
Python: improve location computation
1 parent aa64390 commit c1ebde4

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed

python/ql/lib/semmle/python/AstExtended.qll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,24 @@ class StringPart extends StringPart_, AstNode {
154154
override string toString() { result = StringPart_.super.toString() }
155155

156156
override Location getLocation() { result = StringPart_.super.getLocation() }
157+
158+
/** Holds if the content of string `StringPart` is surrounded by `prefix` and `quote`. */
159+
predicate context(string prefix, string quote) {
160+
exists(int occurrenceOffset |
161+
quote = this.getText().regexpFind("\"{3}|\"{1}|'{3}|'{1}", 0, occurrenceOffset) and
162+
prefix = this.getText().prefix(occurrenceOffset + quote.length())
163+
)
164+
}
165+
166+
/**
167+
* Gets the length of the content, that is the text between the prefix and the quote.
168+
* See `context` for obtaining the prefix and the quote.
169+
*/
170+
int getContentLenght() {
171+
exists(string prefix, string quote | this.context(prefix, quote) |
172+
result = this.getText().length() - prefix.length() - quote.length()
173+
)
174+
}
157175
}
158176

159177
class StringPartList extends StringPartList_ { }

python/ql/lib/semmle/python/regexp/RegexTreeView.qll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,53 @@ module Impl implements RegexTreeViewSig {
223223
*/
224224
Location getLocation() { result = re.getLocation() }
225225

226+
/** Gets the accumulated length of string parts with lower index than `index`, if any. */
227+
private int getPartOffset(int index) {
228+
index = 0 and result = 0
229+
or
230+
index > 0 and
231+
exists(int previousOffset | previousOffset = this.getPartOffset(index - 1) |
232+
result =
233+
previousOffset + re.(StrConst).getImplicitlyConcatenatedPart(index - 1).getContentLenght()
234+
)
235+
}
236+
237+
/**
238+
* Gets the `StringPart` in which this `RegExpTerm` resides, if any.
239+
* `localOffset` will be the offset of this `RegExpTerm` inside `result`.
240+
*/
241+
StringPart getPart(int localOffset) {
242+
exists(int index, int prefixLength | index = max(int i | this.getPartOffset(i) < start) |
243+
result = re.(StrConst).getImplicitlyConcatenatedPart(index) and
244+
exists(string prefix | result.context(prefix, _) | prefixLength = prefix.length()) and
245+
// Example:
246+
// re.compile('...' r"""...this..""")
247+
// - `start` is the offset from `(` to `this` as counted after concatenating all parts.
248+
// - we subtract the lenght of the previous `StringPart`s, `'...'`, to know how far into this `StringPart` we go.
249+
// - as the prefix 'r"""' is part of the `StringPart`, `this` is found that much further in.
250+
localOffset = start - this.getPartOffset(index) + prefixLength
251+
)
252+
}
253+
226254
/** Holds if this term is found at the specified location offsets. */
227255
predicate hasLocationInfo(
228256
string filepath, int startline, int startcolumn, int endline, int endcolumn
229257
) {
258+
not exists(this.getPart(_)) and
230259
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
231260
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
232261
startcolumn = re_start + start + prefix_len and
233262
endcolumn = re_start + end + prefix_len - 1
234263
/* inclusive vs exclusive */
235264
)
265+
or
266+
exists(StringPart part, int localOffset | part = this.getPart(localOffset) |
267+
filepath = part.getLocation().getFile().getAbsolutePath() and
268+
startline = part.getLocation().getStartLine() and
269+
startcolumn = part.getLocation().getStartColumn() + localOffset and
270+
endline = startline and
271+
endcolumn = (end - start) + startcolumn
272+
)
236273
}
237274

238275
/** Gets the file in which this term is found. */

python/ql/test/library-tests/regexparser/locations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,25 @@
5050
)
5151

5252
# plain string with multiple parts
53-
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=1:26
53+
re.compile( # $ location=1:2 location=1:26
5454
'[this] is a test' ' and [this] is another test'
5555
)
5656

5757
# plain string with multiple parts across lines
58-
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=2:7
58+
re.compile( # $ location=1:2 location=2:7
5959
'[this] is a test'
6060
' and [this] is another test'
6161
)
6262

6363
# plain string with multiple parts across lines and comments
64-
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=3:7
64+
re.compile( # $ location=1:2 location=3:7
6565
'[this] is a test'
6666
# comment
6767
' and [this] is another test'
6868
)
6969

7070
# multiple parts of different kinds
71-
re.compile( # $ location=1:2 SPURIOUS:location=1:23 location=1:50 location=1:81 MISSING:location=1:28 location=2:11 location=3:8
71+
re.compile( # $ location=1:2 location=1:28 location=2:11 location=3:8
7272
'[this] is a test' ''' and [this] is another test'''
7373
br""" and [this] is yet another test"""
7474
r' and [this] is one more'

0 commit comments

Comments
 (0)