Skip to content

Commit e1801f3

Browse files
committed
Python: Proper threat-model handling for argparse
1 parent 56c85ff commit e1801f3

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.model.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,13 @@ extensions:
1212
- ['sys', 'Member[argv]', 'commandargs']
1313
- ['sys', 'Member[orig_argv]', 'commandargs']
1414

15-
# TODO: argparse
15+
# if no argument is given, the default is to use sys.argv[1:]
16+
- ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs']
17+
- addsTo:
18+
pack: codeql/python-all
19+
extensible: summaryModel
20+
data:
21+
- ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint']
22+
# note: taint of attribute lookups is handled in QL
23+
1624
# TODO: input / read from stdin

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4989,6 +4989,26 @@ module StdlibPrivate {
49894989

49904990
override string getKind() { result = Escaping::getHtmlKind() }
49914991
}
4992+
4993+
// ---------------------------------------------------------------------------
4994+
// argparse
4995+
// ---------------------------------------------------------------------------
4996+
/**
4997+
* if result of `parse_args` is tainted (because it uses command-line arguments),
4998+
* then the parsed values accesssed on any attribute lookup is also tainted.
4999+
*/
5000+
private class ArgumentParserAnyAttributeStep extends TaintTracking::AdditionalTaintStep {
5001+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
5002+
nodeFrom =
5003+
API::moduleImport("argparse")
5004+
.getMember("ArgumentParser")
5005+
.getReturn()
5006+
.getMember("parse_args")
5007+
.getReturn()
5008+
.getAValueReachableFromSource() and
5009+
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
5010+
}
5011+
}
49925012
}
49935013

49945014
// ---------------------------------------------------------------------------

python/ql/test/library-tests/frameworks/stdlib/threat_models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030
parser = argparse.ArgumentParser()
3131
parser.add_argument("foo")
3232

33-
args = parser.parse_args() # $ MISSING: threatModelSource[commandargs]=parser.parse_args()
34-
ensure_tainted(args.foo) # $ MISSING: tainted
33+
args = parser.parse_args() # $ threatModelSource[commandargs]=parser.parse_args()
34+
ensure_tainted(args.foo) # $ tainted
3535

3636
explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv
37-
ensure_tainted(explicit_argv_parsing.foo) # $ MISSING: tainted
37+
ensure_tainted(explicit_argv_parsing.foo) # $ tainted
3838

3939
fake_args = parser.parse_args(["<foo>"])
40-
ensure_not_tainted(fake_args.foo)
40+
ensure_not_tainted(fake_args.foo) # $ SPURIOUS: tainted
4141

4242
########################################
4343
# reading input from stdin

0 commit comments

Comments
 (0)