Skip to content

Commit 2b0415e

Browse files
authored
Merge pull request github#6741 from yoff/python/model-os-path-file-accesses
Approved by RasmusWL
2 parents 878203f + 6c108e4 commit 2b0415e

File tree

2 files changed

+100
-28
lines changed

2 files changed

+100
-28
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 93 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -196,25 +196,110 @@ private module StdlibPrivate {
196196
}
197197

198198
/**
199-
* A call to `os.path.normpath`.
200-
* See https://docs.python.org/3/library/os.path.html#os.path.normpath
199+
* The `os.path` module offers a number of methods for checking if a file exists and/or has certain
200+
* properties, leading to a file system access.
201+
* A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system.
202+
* (Although, on some platforms, the check may return `false` due to missing permissions.)
203+
* A call to `os.path.getatime` will raise `OSError` if the file does not exist or is inaccessible.
204+
* See:
205+
* - https://docs.python.org/3/library/os.path.html#os.path.exists
206+
* - https://docs.python.org/3/library/os.path.html#os.path.lexists
207+
* - https://docs.python.org/3/library/os.path.html#os.path.isfile
208+
* - https://docs.python.org/3/library/os.path.html#os.path.isdir
209+
* - https://docs.python.org/3/library/os.path.html#os.path.islink
210+
* - https://docs.python.org/3/library/os.path.html#os.path.ismount
211+
* - https://docs.python.org/3/library/os.path.html#os.path.getatime
212+
* - https://docs.python.org/3/library/os.path.html#os.path.getmtime
213+
* - https://docs.python.org/3/library/os.path.html#os.path.getctime
214+
* - https://docs.python.org/3/library/os.path.html#os.path.getsize
215+
* - https://docs.python.org/3/library/os.path.html#os.path.realpath
201216
*/
202-
private class OsPathNormpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
203-
OsPathNormpathCall() { this = os::path().getMember("normpath").getACall() }
217+
private class OsPathProbingCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
218+
OsPathProbingCall() {
219+
this =
220+
os::path()
221+
.getMember([
222+
// these check if the file exists
223+
"exists", "lexists", "isfile", "isdir", "islink", "ismount",
224+
// these raise errors if the file does not exist
225+
"getatime", "getmtime", "getctime", "getsize"
226+
])
227+
.getACall()
228+
}
204229

205-
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
230+
override DataFlow::Node getAPathArgument() {
231+
result in [this.getArg(0), this.getArgByName("path")]
232+
}
233+
}
234+
235+
/** A call to `os.path.samefile` will raise an exception if an `os.stat()` call on either pathname fails. */
236+
private class OsPathSamefileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
237+
OsPathSamefileCall() { this = os::path().getMember("samefile").getACall() }
238+
239+
override DataFlow::Node getAPathArgument() {
240+
result in [
241+
this.getArg(0), this.getArgByName("path1"), this.getArg(1), this.getArgByName("path2")
242+
]
243+
}
244+
}
245+
246+
// Functions with non-standard arguments:
247+
// - os.path.join(path, *paths)
248+
// - os.path.relpath(path, start=os.curdir)
249+
// these functions need special treatment when computing `getPathArg`.
250+
//
251+
// Functions that excluded because they can act as sanitizers:
252+
// - os.path.commonpath(paths): takes a sequence
253+
// - os.path.commonprefix(list): takes a list argument
254+
// unless the user control all arguments, we are comparing with a known value.
255+
private string pathComputation() {
256+
result in [
257+
"abspath", "basename", "commonpath", "dirname", "expanduser", "expandvars", "join",
258+
"normcase", "normpath", "realpath", "relpath", "split", "splitdrive", "splitext"
259+
]
206260
}
207261

208-
/** An additional taint step for calls to `os.path.normpath` */
209-
private class OsPathNormpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
262+
/**
263+
* The `os.path` module offers a number of methods for computing new paths from existing paths.
264+
* These should all propagate taint.
265+
*/
266+
private class OsPathComputation extends DataFlow::CallCfgNode {
267+
string methodName;
268+
269+
OsPathComputation() {
270+
methodName = pathComputation() and
271+
this = os::path().getMember(methodName).getACall()
272+
}
273+
274+
DataFlow::Node getPathArg() {
275+
result in [this.getArg(0), this.getArgByName("path")]
276+
or
277+
methodName = "join" and result = this.getArg(_)
278+
or
279+
methodName = "relpath" and result in [this.getArg(1), this.getArgByName("start")]
280+
}
281+
}
282+
283+
/** An additional taint step for path computations. */
284+
private class OsPathComputationAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
210285
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
211-
exists(OsPathNormpathCall call |
286+
exists(OsPathComputation call |
212287
nodeTo = call and
213288
nodeFrom = call.getPathArg()
214289
)
215290
}
216291
}
217292

293+
/**
294+
* A call to `os.path.normpath`.
295+
* See https://docs.python.org/3/library/os.path.html#os.path.normpath
296+
*/
297+
private class OsPathNormpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
298+
OsPathNormpathCall() { this = os::path().getMember("normpath").getACall() }
299+
300+
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
301+
}
302+
218303
/**
219304
* A call to `os.path.abspath`.
220305
* See https://docs.python.org/3/library/os.path.html#os.path.abspath
@@ -225,16 +310,6 @@ private module StdlibPrivate {
225310
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
226311
}
227312

228-
/** An additional taint step for calls to `os.path.abspath` */
229-
private class OsPathAbspathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
230-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
231-
exists(OsPathAbspathCall call |
232-
nodeTo = call and
233-
nodeFrom = call.getPathArg()
234-
)
235-
}
236-
}
237-
238313
/**
239314
* A call to `os.path.realpath`.
240315
* See https://docs.python.org/3/library/os.path.html#os.path.realpath
@@ -245,16 +320,6 @@ private module StdlibPrivate {
245320
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
246321
}
247322

248-
/** An additional taint step for calls to `os.path.realpath` */
249-
private class OsPathRealpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
250-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
251-
exists(OsPathRealpathCall call |
252-
nodeTo = call and
253-
nodeFrom = call.getPathArg()
254-
)
255-
}
256-
}
257-
258323
/**
259324
* A call to `os.system`.
260325
* See https://docs.python.org/3/library/os.html#os.system

python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,10 @@ def through_function(open_file):
2727
open_file.write("foo") # $ fileWriteData="foo" getAPathArgument="path"
2828

2929
through_function(f)
30+
31+
from os import path
32+
path.exists("filepath") # $ getAPathArgument="filepath"
33+
path.isfile("filepath") # $ getAPathArgument="filepath"
34+
path.isdir("filepath") # $ getAPathArgument="filepath"
35+
path.islink("filepath") # $ getAPathArgument="filepath"
36+
path.ismount("filepath") # $ getAPathArgument="filepath"

0 commit comments

Comments
 (0)