Skip to content

Commit 2387dc6

Browse files
committed
Python: Attempts at modelling pathlib-Paths
1 parent 8489403 commit 2387dc6

File tree

1 file changed

+213
-0
lines changed

1 file changed

+213
-0
lines changed

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,219 @@ private module Stdlib {
864864
class Sqlite3 extends PEP249ModuleApiNode {
865865
Sqlite3() { this = API::moduleImport("sqlite3") }
866866
}
867+
868+
// ---------------------------------------------------------------------------
869+
// pathlib
870+
// ---------------------------------------------------------------------------
871+
/** Gets a reference to the `pathlib` module. */
872+
private API::Node pathlib() { result = API::moduleImport("pathlib") }
873+
874+
/**
875+
* Gets a name of a constructor for a `pathlib.Path` object.
876+
* We include the pure paths, as they can be "exported" (say with `as_posix`) and then used to acces the underlying file system.
877+
*/
878+
private string pathlibPathConstructor() {
879+
result in ["Path", "PurePath", "PurePosixPath", "PureWindowsPath", "PosixPath", "WindowsPath"]
880+
}
881+
882+
/**
883+
* Gets a name of an attribute of a `pathlib.Path` object that is also a `pathlib.Path` object.
884+
*/
885+
private string pathlibPathAttribute() { result in ["parent"] }
886+
887+
/**
888+
* Gets a name of a method of a `pathlib.Path` object that returns a `pathlib.Path` object.
889+
*/
890+
private string pathlibPathMethod() { result in ["absolute", "relative_to"] }
891+
892+
/**
893+
* Gets a name of a method of a `pathlib.Path` object that modifies a `pathlib.Path` object based on new data.
894+
*/
895+
private string pathlibPathInjection() {
896+
result in ["joinpath", "with_name", "with_stem", "with_suffix"]
897+
}
898+
899+
/**
900+
* Gets a name of an attribute of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
901+
*/
902+
private string pathlibPathAttributeExport() {
903+
result in ["drive", "root", "anchor", "name", "suffix", "stem"]
904+
}
905+
906+
/**
907+
* Gets a name of a method of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
908+
*/
909+
private string pathlibPathMethodExport() { result in ["as_posix", "as_uri"] }
910+
911+
/**
912+
* Gets a reference to a `pathlib.Path` object.
913+
* This type tracker makes the monomorphic API use assumption.
914+
*/
915+
private DataFlow::LocalSourceNode pathlibPath(DataFlow::TypeTracker t) {
916+
// Type construction
917+
t.start() and
918+
result = pathlib().getMember(pathlibPathConstructor()).getACall()
919+
or
920+
// Type-preserving call
921+
exists(DataFlow::AttrRead returnsPath, DataFlow::TypeTracker t2 |
922+
returnsPath.getAttributeName() = pathlibPathMethod() and
923+
returnsPath.getObject().getALocalSource() = pathlibPath(t2) and
924+
t2.end()
925+
|
926+
t.start() and
927+
result.(DataFlow::CallCfgNode).getFunction() = returnsPath
928+
)
929+
or
930+
// Type-preserving attribute
931+
exists(DataFlow::AttrRead isPath, DataFlow::TypeTracker t2 |
932+
isPath.getAttributeName() = pathlibPathAttribute() and
933+
isPath.getObject().getALocalSource() = pathlibPath(t2) and
934+
t2.end()
935+
|
936+
t.start() and
937+
result = isPath
938+
)
939+
or
940+
// Data injection
941+
exists(
942+
BinaryExprNode slash, DataFlow::Node right, DataFlow::Node left, DataFlow::TypeTracker t2
943+
|
944+
slash.getOp() instanceof Div and
945+
right.asCfgNode() = slash.getRight() and
946+
left.asCfgNode() = slash.getLeft() and
947+
left.getALocalSource() = pathlibPath(t2) and
948+
t2.end()
949+
|
950+
t.start() and
951+
result.asCfgNode() = slash
952+
)
953+
or
954+
exists(DataFlow::AttrRead returnsPath, DataFlow::TypeTracker t2 |
955+
returnsPath.getAttributeName() = pathlibPathInjection() and
956+
returnsPath.getObject().getALocalSource() = pathlibPath(t2) and
957+
t2.end()
958+
|
959+
t.start() and
960+
result.(DataFlow::CallCfgNode).getFunction() = returnsPath
961+
)
962+
or
963+
// Due to bad performance when using normal setup with `path(t2).track(t2, t)`
964+
// we have inlined that code and forced a join
965+
exists(DataFlow::TypeTracker t2 |
966+
exists(DataFlow::StepSummary summary |
967+
pathlibPath_first_join(t2, result, summary) and
968+
t = t2.append(summary)
969+
)
970+
)
971+
}
972+
973+
pragma[nomagic]
974+
private predicate pathlibPath_first_join(
975+
DataFlow::TypeTracker t2, DataFlow::Node res, DataFlow::StepSummary summary
976+
) {
977+
DataFlow::StepSummary::step(pathlibPath(t2), res, summary)
978+
}
979+
980+
/** Gets a reference to a `pathlib.Path` object. */
981+
DataFlow::LocalSourceNode pathlibPath() { result = pathlibPath(DataFlow::TypeTracker::end()) }
982+
983+
private class PathlibFileAccess extends FileSystemAccess::Range, DataFlow::CallCfgNode {
984+
DataFlow::AttrRead fileAccess;
985+
986+
PathlibFileAccess() {
987+
fileAccess.getAttributeName() in [
988+
"stat", "chmod", "exists", "expanduser", "glob", "group", "is_dir", "is_file", "is_mount",
989+
"is_symlink", "is_socket", "is_fifo", "is_block_device", "is_char_device", "iter_dir",
990+
"lchmod", "lstat", "mkdir", "open", "owner", "read_bytes", "read_text", "readlink",
991+
"rename", "replace", "resolve", "rglob", "rmdir", "samefile", "symlink_to", "touch",
992+
"unlink", "link_to", "write_bytes", "write_text"
993+
] and
994+
fileAccess.getObject().getALocalSource() = pathlibPath() and
995+
this.getFunction() = fileAccess
996+
}
997+
998+
override DataFlow::Node getAPathArgument() { result = fileAccess.getObject() }
999+
}
1000+
1001+
/** An additional taint steps for objects of type `pathlib.Path` */
1002+
private class PathlibPathTaintStep extends TaintTracking::AdditionalTaintStep {
1003+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
1004+
// Type construction
1005+
nodeTo = pathlib().getMember(pathlibPathConstructor()).getACall() and
1006+
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
1007+
or
1008+
// Type preservation
1009+
nodeFrom.getALocalSource() = pathlibPath() and
1010+
nodeTo.getALocalSource() = pathlibPath() and
1011+
(
1012+
// Type-preserving call
1013+
exists(DataFlow::AttrRead returnsPath |
1014+
returnsPath.getAttributeName() = pathlibPathMethod()
1015+
|
1016+
nodeTo.(DataFlow::CallCfgNode).getFunction() = returnsPath and
1017+
nodeFrom = returnsPath.getObject()
1018+
)
1019+
or
1020+
// Type-preserving attribute
1021+
exists(DataFlow::AttrRead isPath | isPath.getAttributeName() = pathlibPathAttribute() |
1022+
nodeTo = isPath and
1023+
nodeFrom = isPath.getObject()
1024+
)
1025+
)
1026+
or
1027+
// Data injection
1028+
nodeTo.getALocalSource() = pathlibPath() and
1029+
(
1030+
// Special handling of the `/` operator
1031+
exists(BinaryExprNode slash, DataFlow::Node left |
1032+
slash.getOp() instanceof Div and
1033+
left.asCfgNode() = slash.getLeft() and
1034+
left.getALocalSource() = pathlibPath()
1035+
|
1036+
nodeTo.asCfgNode() = slash and
1037+
(
1038+
// type-preserving call
1039+
nodeFrom = left
1040+
or
1041+
// data injection
1042+
nodeFrom.asCfgNode() = slash.getRight()
1043+
)
1044+
)
1045+
or
1046+
// standard case
1047+
exists(DataFlow::AttrRead augmentsPath |
1048+
augmentsPath.getAttributeName() = pathlibPathInjection()
1049+
|
1050+
nodeTo.(DataFlow::CallCfgNode).getFunction() = augmentsPath and
1051+
(
1052+
// type-preserving call
1053+
nodeFrom = augmentsPath.getObject()
1054+
or
1055+
// data injection
1056+
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
1057+
)
1058+
)
1059+
)
1060+
or
1061+
// Export data from type
1062+
nodeFrom.getALocalSource() = pathlibPath() and
1063+
(
1064+
// exporting attribute
1065+
exists(DataFlow::AttrRead export |
1066+
export.getAttributeName() = pathlibPathAttributeExport()
1067+
|
1068+
nodeTo = export and
1069+
nodeFrom = export.getObject()
1070+
)
1071+
or
1072+
// exporting call
1073+
exists(DataFlow::AttrRead export | export.getAttributeName() = pathlibPathMethodExport() |
1074+
nodeTo.(DataFlow::CallCfgNode).getFunction() = export and
1075+
nodeFrom = export.getObject()
1076+
)
1077+
)
1078+
}
1079+
}
8671080
}
8681081

8691082
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)