Skip to content

Commit 39ec870

Browse files
committed
Python: Add FileSystemWriteAccess concept
I made `FileSystemWriteAccess` be a subclass of `FileSystemAccess` (like in [JS](https://github.com/github/codeql/blob/64001cc02cd67cc5df003199954be0d4135b3eb6/javascript/ql/src/semmle/javascript/Concepts.qll#L68-L74)), but then I started wondering about how I could give a good result for `getAPathArgument`, and what would a good result even be? The argument to the `open` call, or the object that the `write` method is called on? I can't see how doing either of these enables us to do anything useful... So I looked closer at how JS uses `FileSystemWriteAccess`: 1. as sink for zip-slip: https://github.com/github/codeql/blob/7c51dff0f781a0ca1efe32ef449b7b1b88cf0548/javascript/ql/src/semmle/javascript/security/dataflow/ZipSlipCustomizations.qll#L121 2. as sink for downloading unsafe files (identified through their extension) through non-secure connections: https://github.com/github/codeql/blob/89ef6ea4eb1d7f606380fc9cd904ac2b540c8eae/javascript/ql/src/semmle/javascript/security/dataflow/InsecureDownloadCustomizations.qll#L134-L150 3. as sink for writing untrusted data to a local file https://github.com/github/codeql/blob/93b1e59d62a596fe40ebce13ba2dd8e6117b25fd/javascript/ql/src/semmle/javascript/security/dataflow/HttpToFileAccessCustomizations.qll#L43-L46 for the 2 first sinks, it's important that `getAPathArgument` has a proper result... so that solves the problem, and highlights that it _can_ be important to give proper results for `getAPathArgument` (if possible). So I'm trying to do best effort for `f = open(...); f.write(...)`, but with this current code we won't always be able to give a result (as highlighted by the tests). It will also be the case that there are multiple `FileSystemAccess` with the same path-argument, which could be a little strange. overall, I'm not super confident about the way this new concept and implementation turned out, but it also seems like the best I could come up with right now... The obvious alternative solution is to NOT make `FileSystemWriteAccess` a subclass of `FileSystemAccess`, but I'm not very tempted to go down this path, given the examples of this being useful above, and just the general notion that we should be able to model writes as being a specialized kind of `FileSystemAccess`.
1 parent 6a6d6fb commit 39ec870

File tree

5 files changed

+104
-9
lines changed

5 files changed

+104
-9
lines changed

python/ql/src/semmle/python/Concepts.qll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,39 @@ module FileSystemAccess {
7272
}
7373
}
7474

75+
/**
76+
* A data flow node that writes data to the file system access.
77+
*
78+
* Extend this class to refine existing API models. If you want to model new APIs,
79+
* extend `FileSystemWriteAccess::Range` instead.
80+
*/
81+
class FileSystemWriteAccess extends FileSystemAccess {
82+
override FileSystemWriteAccess::Range range;
83+
84+
/**
85+
* Gets a node that represents data to be written to the file system (possibly with
86+
* some transformation happening before it is written, like JSON encoding).
87+
*/
88+
DataFlow::Node getADataNode() { result = range.getADataNode() }
89+
}
90+
91+
/** Provides a class for modeling new file system writes. */
92+
module FileSystemWriteAccess {
93+
/**
94+
* A data flow node that writes data to the file system access.
95+
*
96+
* Extend this class to model new APIs. If you want to refine existing API models,
97+
* extend `FileSystemWriteAccess` instead.
98+
*/
99+
abstract class Range extends FileSystemAccess::Range {
100+
/**
101+
* Gets a node that represents data to be written to the file system (possibly with
102+
* some transformation happening before it is written, like JSON encoding).
103+
*/
104+
abstract DataFlow::Node getADataNode();
105+
}
106+
}
107+
75108
/** Provides classes for modeling path-related APIs. */
76109
module Path {
77110
/**

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -383,23 +383,45 @@ private module Stdlib {
383383
}
384384
}
385385

386+
/** Gets a reference to the builtin `open` function */
387+
private API::Node getOpenFunctionRef() {
388+
result = API::builtin("open")
389+
or
390+
// io.open is a special case, since it is an alias for the builtin `open`
391+
result = API::moduleImport("io").getMember("open")
392+
}
393+
386394
/**
387395
* A call to the builtin `open` function.
388396
* See https://docs.python.org/3/library/functions.html#open
389397
*/
390398
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
391-
OpenCall() {
392-
this = API::builtin("open").getACall()
393-
or
394-
// io.open is a special case, since it is an alias for the builtin `open`
395-
this = API::moduleImport("io").getMember("open").getACall()
396-
}
399+
OpenCall() { this = getOpenFunctionRef().getACall() }
397400

398401
override DataFlow::Node getAPathArgument() {
399402
result in [this.getArg(0), this.getArgByName("file")]
400403
}
401404
}
402405

406+
/** A call to the `write` or `writelines` method on an opened file, such as `open("foo", "w").write(...)`. */
407+
private class WriteOnOpenFile extends FileSystemWriteAccess::Range, DataFlow::CallCfgNode {
408+
WriteOnOpenFile() {
409+
this = getOpenFunctionRef().getReturn().getMember(["write", "writelines"]).getACall()
410+
}
411+
412+
override DataFlow::Node getAPathArgument() {
413+
// best effort attempt to give the path argument, that was initially given to the `open` call.
414+
exists(OpenCall openCall, DataFlow::AttrRead read |
415+
read.getAttributeName() in ["write", "writelines"] and
416+
openCall.flowsTo(read.getObject()) and
417+
read.(DataFlow::LocalSourceNode).flowsTo(this.getFunction()) and
418+
result = openCall.getAPathArgument()
419+
)
420+
}
421+
422+
override DataFlow::Node getADataNode() { result in [this.getArg(0), this.getArgByName("data")] }
423+
}
424+
403425
/**
404426
* An exec statement (only Python 2).
405427
* See https://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
@@ -1001,11 +1023,14 @@ private module Stdlib {
10011023
/** Gets a reference to a `pathlib.Path` object. */
10021024
DataFlow::LocalSourceNode pathlibPath() { result = pathlibPath(DataFlow::TypeTracker::end()) }
10031025

1026+
/** A file system access from a `pathlib.Path` method call. */
10041027
private class PathlibFileAccess extends FileSystemAccess::Range, DataFlow::CallCfgNode {
10051028
DataFlow::AttrRead fileAccess;
1029+
string attrbuteName;
10061030

10071031
PathlibFileAccess() {
1008-
fileAccess.getAttributeName() in [
1032+
attrbuteName = fileAccess.getAttributeName() and
1033+
attrbuteName in [
10091034
"stat", "chmod", "exists", "expanduser", "glob", "group", "is_dir", "is_file", "is_mount",
10101035
"is_symlink", "is_socket", "is_fifo", "is_block_device", "is_char_device", "iter_dir",
10111036
"lchmod", "lstat", "mkdir", "open", "owner", "read_bytes", "read_text", "readlink",
@@ -1019,6 +1044,13 @@ private module Stdlib {
10191044
override DataFlow::Node getAPathArgument() { result = fileAccess.getObject() }
10201045
}
10211046

1047+
/** A file system write from a `pathlib.Path` method call. */
1048+
private class PathlibFileWrites extends PathlibFileAccess, FileSystemWriteAccess::Range {
1049+
PathlibFileWrites() { attrbuteName in ["write_bytes", "write_text"] }
1050+
1051+
override DataFlow::Node getADataNode() { result in [this.getArg(0), this.getArgByName("data")] }
1052+
}
1053+
10221054
/** An additional taint steps for objects of type `pathlib.Path` */
10231055
private class PathlibPathTaintStep extends TaintTracking::AdditionalTaintStep {
10241056
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {

python/ql/test/experimental/meta/ConceptsTest.qll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,23 @@ class FileSystemAccessTest extends InlineExpectationsTest {
269269
}
270270
}
271271

272+
class FileSystemWriteAccessTest extends InlineExpectationsTest {
273+
FileSystemWriteAccessTest() { this = "FileSystemWriteAccessTest" }
274+
275+
override string getARelevantTag() { result = "fileWriteData" }
276+
277+
override predicate hasActualResult(Location location, string element, string tag, string value) {
278+
exists(location.getFile().getRelativePath()) and
279+
exists(FileSystemWriteAccess write, DataFlow::Node data |
280+
data = write.getADataNode() and
281+
location = data.getLocation() and
282+
element = data.toString() and
283+
value = prettyNodeForInlineTest(data) and
284+
tag = "fileWriteData"
285+
)
286+
}
287+
}
288+
272289
class PathNormalizationTest extends InlineExpectationsTest {
273290
PathNormalizationTest() { this = "PathNormalizationTest" }
274291

python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
with p.open() as f: # $ getAPathArgument=p
1212
f.read()
1313

14-
p.write_bytes(b"hello") # $ getAPathArgument=p
14+
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
15+
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
16+
p.open("wt").write("hello") # $ getAPathArgument=p MISSING: fileWriteData="hello"
1517

1618
name = windows.parent.name
1719
o = open
1820
o(name) # $ getAPathArgument=name
1921

2022
wb = p.write_bytes
21-
wb(b"hello") # $ getAPathArgument=p
23+
wb(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"

python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,14 @@
1616

1717
io.open("filepath") # $ getAPathArgument="filepath"
1818
io.open(file="filepath") # $ getAPathArgument="filepath"
19+
20+
f = open("path") # $ getAPathArgument="path"
21+
f.write("foo") # $ getAPathArgument="path" fileWriteData="foo"
22+
lines = ["foo"]
23+
f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines
24+
25+
26+
def through_function(open_file):
27+
open_file.write("foo") # $ fileWriteData="foo" MISSING: getAPathArgument="path"
28+
29+
through_function(f)

0 commit comments

Comments
 (0)