Skip to content

Commit 95f26aa

Browse files
authored
Merge pull request github#5681 from yoff/python-support-pathlib
Approved by tausbn
2 parents 5ee74d2 + 16bde27 commit 95f26aa

File tree

6 files changed

+226
-15
lines changed

6 files changed

+226
-15
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of `pathlib` from the standard library to recognize `Path` objects constructed in various ways and resulting file accesses. This can lead to new results for `py/path-injection`.

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,194 @@ private module Stdlib {
864864
class Sqlite3 extends PEP249ModuleApiNode {
865865
Sqlite3() { this = API::moduleImport("sqlite3") }
866866
}
867+
868+
// ---------------------------------------------------------------------------
869+
// pathlib
870+
// ---------------------------------------------------------------------------
871+
/** Gets a reference to the `pathlib` module. */
872+
private API::Node pathlib() { result = API::moduleImport("pathlib") }
873+
874+
/**
875+
* Gets a name of a constructor for a `pathlib.Path` object.
876+
* We include the pure paths, as they can be "exported" (say with `as_posix`) and then used to acces the underlying file system.
877+
*/
878+
private string pathlibPathConstructor() {
879+
result in ["Path", "PurePath", "PurePosixPath", "PureWindowsPath", "PosixPath", "WindowsPath"]
880+
}
881+
882+
/**
883+
* Gets a name of an attribute of a `pathlib.Path` object that is also a `pathlib.Path` object.
884+
*/
885+
private string pathlibPathAttribute() { result in ["parent"] }
886+
887+
/**
888+
* Gets a name of a method of a `pathlib.Path` object that returns a `pathlib.Path` object.
889+
*/
890+
private string pathlibPathMethod() {
891+
result in ["absolute", "relative_to", "rename", "replace", "resolve"]
892+
}
893+
894+
/**
895+
* Gets a name of a method of a `pathlib.Path` object that modifies a `pathlib.Path` object based on new data.
896+
*/
897+
private string pathlibPathInjection() {
898+
result in ["joinpath", "with_name", "with_stem", "with_suffix"]
899+
}
900+
901+
/**
902+
* Gets a name of an attribute of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
903+
*/
904+
private string pathlibPathAttributeExport() {
905+
result in ["drive", "root", "anchor", "name", "suffix", "stem"]
906+
}
907+
908+
/**
909+
* Gets a name of a method of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
910+
*/
911+
private string pathlibPathMethodExport() { result in ["as_posix", "as_uri"] }
912+
913+
/**
914+
* Flow for attributes and methods that return a `pathlib.Path` object.
915+
*/
916+
private predicate pathlibPathStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
917+
exists(DataFlow::AttrRead returnsPath |
918+
(
919+
// attribute access
920+
returnsPath.getAttributeName() = pathlibPathAttribute() and
921+
nodeTo = returnsPath
922+
or
923+
// method call
924+
returnsPath.getAttributeName() = pathlibPathMethod() and
925+
returnsPath
926+
.(DataFlow::LocalSourceNode)
927+
.flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction())
928+
) and
929+
nodeFrom = returnsPath.getObject()
930+
)
931+
}
932+
933+
/**
934+
* Gets a reference to a `pathlib.Path` object.
935+
* This type tracker makes the monomorphic API use assumption.
936+
*/
937+
private DataFlow::LocalSourceNode pathlibPath(DataFlow::TypeTracker t) {
938+
// Type construction
939+
t.start() and
940+
result = pathlib().getMember(pathlibPathConstructor()).getACall()
941+
or
942+
// Type-preserving step
943+
exists(DataFlow::Node nodeFrom, DataFlow::TypeTracker t2 |
944+
pathlibPath(t2).flowsTo(nodeFrom) and
945+
t2.end()
946+
|
947+
t.start() and
948+
pathlibPathStep(nodeFrom, result)
949+
)
950+
or
951+
// Data injection
952+
// Special handling of the `/` operator
953+
exists(BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 |
954+
slash.getOp() instanceof Div and
955+
pathOperand.asCfgNode() = slash.getAnOperand() and
956+
pathlibPath(t2).flowsTo(pathOperand) and
957+
t2.end()
958+
|
959+
t.start() and
960+
result.asCfgNode() = slash
961+
)
962+
or
963+
// standard case
964+
exists(DataFlow::AttrRead returnsPath, DataFlow::TypeTracker t2 |
965+
returnsPath.getAttributeName() = pathlibPathInjection() and
966+
pathlibPath(t2).flowsTo(returnsPath.getObject()) and
967+
t2.end()
968+
|
969+
t.start() and
970+
result.(DataFlow::CallCfgNode).getFunction() = returnsPath
971+
)
972+
or
973+
// Track further
974+
exists(DataFlow::TypeTracker t2 | result = pathlibPath(t2).track(t2, t))
975+
}
976+
977+
/** Gets a reference to a `pathlib.Path` object. */
978+
DataFlow::LocalSourceNode pathlibPath() { result = pathlibPath(DataFlow::TypeTracker::end()) }
979+
980+
private class PathlibFileAccess extends FileSystemAccess::Range, DataFlow::CallCfgNode {
981+
DataFlow::AttrRead fileAccess;
982+
983+
PathlibFileAccess() {
984+
fileAccess.getAttributeName() in [
985+
"stat", "chmod", "exists", "expanduser", "glob", "group", "is_dir", "is_file", "is_mount",
986+
"is_symlink", "is_socket", "is_fifo", "is_block_device", "is_char_device", "iter_dir",
987+
"lchmod", "lstat", "mkdir", "open", "owner", "read_bytes", "read_text", "readlink",
988+
"rename", "replace", "resolve", "rglob", "rmdir", "samefile", "symlink_to", "touch",
989+
"unlink", "link_to", "write_bytes", "write_text"
990+
] and
991+
pathlibPath().flowsTo(fileAccess.getObject()) and
992+
fileAccess.(DataFlow::LocalSourceNode).flowsTo(this.getFunction())
993+
}
994+
995+
override DataFlow::Node getAPathArgument() { result = fileAccess.getObject() }
996+
}
997+
998+
/** An additional taint steps for objects of type `pathlib.Path` */
999+
private class PathlibPathTaintStep extends TaintTracking::AdditionalTaintStep {
1000+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
1001+
// Type construction
1002+
nodeTo = pathlib().getMember(pathlibPathConstructor()).getACall() and
1003+
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
1004+
or
1005+
// Type preservation
1006+
pathlibPath().flowsTo(nodeFrom) and
1007+
pathlibPathStep(nodeFrom, nodeTo)
1008+
or
1009+
// Data injection
1010+
pathlibPath().flowsTo(nodeTo) and
1011+
(
1012+
// Special handling of the `/` operator
1013+
exists(BinaryExprNode slash, DataFlow::Node pathOperand |
1014+
slash.getOp() instanceof Div and
1015+
pathOperand.asCfgNode() = slash.getAnOperand() and
1016+
pathlibPath().flowsTo(pathOperand)
1017+
|
1018+
nodeTo.asCfgNode() = slash and
1019+
// Taint can flow either from the left or the right operand as long as one of them is a path.
1020+
nodeFrom.asCfgNode() = slash.getAnOperand()
1021+
)
1022+
or
1023+
// standard case
1024+
exists(DataFlow::AttrRead augmentsPath |
1025+
augmentsPath.getAttributeName() = pathlibPathInjection()
1026+
|
1027+
augmentsPath
1028+
.(DataFlow::LocalSourceNode)
1029+
.flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction()) and
1030+
(
1031+
// type-preserving call
1032+
nodeFrom = augmentsPath.getObject()
1033+
or
1034+
// data injection
1035+
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
1036+
)
1037+
)
1038+
)
1039+
or
1040+
// Export data from type
1041+
pathlibPath().flowsTo(nodeFrom) and
1042+
exists(DataFlow::AttrRead exportPath |
1043+
// exporting attribute
1044+
exportPath.getAttributeName() = pathlibPathAttributeExport() and
1045+
nodeTo = exportPath
1046+
or
1047+
// exporting method
1048+
exportPath.getAttributeName() = pathlibPathMethodExport() and
1049+
exportPath.(DataFlow::LocalSourceNode).flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction())
1050+
|
1051+
nodeFrom = exportPath.getObject()
1052+
)
1053+
}
1054+
}
8671055
}
8681056

8691057
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_pathlib.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,20 @@ def test_basic():
2323
tainted_pure_windows_path = pathlib.PureWindowsPath(ts)
2424

2525
ensure_tainted(
26-
tainted_path, # $ MISSING: tainted
26+
tainted_path, # $ tainted
2727

28-
tainted_pure_path, # $ MISSING: tainted
29-
tainted_pure_posix_path, # $ MISSING: tainted
30-
tainted_pure_windows_path, # $ MISSING: tainted
28+
tainted_pure_path, # $ tainted
29+
tainted_pure_posix_path, # $ tainted
30+
tainted_pure_windows_path, # $ tainted
3131

32-
pathlib.Path("foo") / ts, # $ MISSING: tainted
33-
ts / pathlib.Path("foo"), # $ MISSING: tainted
32+
pathlib.Path("foo") / ts, # $ tainted
33+
ts / pathlib.Path("foo"), # $ tainted
3434

35-
tainted_path.joinpath("foo", "bar"), # $ MISSING: tainted
36-
pathlib.Path("foo").joinpath(tainted_path, "bar"), # $ MISSING: tainted
37-
pathlib.Path("foo").joinpath("bar", tainted_path), # $ MISSING: tainted
35+
tainted_path.joinpath("foo", "bar"), # $ tainted
36+
pathlib.Path("foo").joinpath(tainted_path, "bar"), # $ tainted
37+
pathlib.Path("foo").joinpath("bar", tainted_path), # $ tainted
3838

39-
str(tainted_path), # $ MISSING: tainted
39+
str(tainted_path), # $ tainted
4040

4141
# TODO: Tainted methods and attributes
4242
# https://docs.python.org/3.8/library/pathlib.html#methods-and-properties
@@ -46,13 +46,13 @@ def test_basic():
4646
tainted_posix_path = pathlib.PosixPath(ts)
4747

4848
ensure_tainted(
49-
tainted_posix_path, # $ MISSING: tainted
49+
tainted_posix_path, # $ tainted
5050
)
5151

5252
if os.name == "nt":
5353
tainted_windows_path = pathlib.WindowsPath(ts)
5454
ensure_tainted(
55-
tainted_windows_path, # $ MISSING: tainted
55+
tainted_windows_path, # $ tainted
5656
)
5757

5858
# Make tests runable

python/ql/test/library-tests/frameworks/django-v2-v3/testproj/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pathlib import Path
1414

1515
# Build paths inside the project like this: BASE_DIR / 'subdir'.
16-
BASE_DIR = Path(__file__).resolve().parent.parent
16+
BASE_DIR = Path(__file__).resolve().parent.parent #$ getAPathArgument=Path()
1717

1818

1919
# Quick-start development settings - unsuitable for production

python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,24 @@
1616

1717
io.open("filepath") # $getAPathArgument="filepath"
1818
io.open(file="filepath") # $getAPathArgument="filepath"
19+
20+
from pathlib import Path, PosixPath, WindowsPath
21+
22+
p = Path("filepath")
23+
posix = PosixPath("posix/filepath")
24+
windows = WindowsPath("windows/filepath")
25+
26+
p.chmod(0o777) # $getAPathArgument=p
27+
posix.chmod(0o777) # $getAPathArgument=posix
28+
windows.chmod(0o777) # $getAPathArgument=windows
29+
30+
with p.open() as f: # $getAPathArgument=p
31+
f.read()
32+
33+
p.write_bytes(b"hello") # $getAPathArgument=p
34+
35+
name = windows.parent.name
36+
o(name) # $getAPathArgument=name
37+
38+
wb = p.write_bytes
39+
wb(b"hello") # $getAPathArgument=p

python/ql/test/library-tests/frameworks/stdlib/SafeAccessCheck.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
if s.startswith("tainted"): # $checks=s branch=true
44
pass
55

6-
sw = s.startswith # $ MISSING: checks=s branch=true
7-
if sw("safe"):
6+
sw = s.startswith
7+
if sw("safe"): # $ MISSING: checks=s branch=true
88
pass

0 commit comments

Comments
 (0)