Skip to content

Commit 5795c72

Browse files
committed
added inline tests
1 parent 6ebdae3 commit 5795c72

File tree

5 files changed

+150
-89
lines changed

5 files changed

+150
-89
lines changed

python/ql/src/experimental/Security/CWE-409/DecompressionBombs.ql

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -18,57 +18,6 @@ import semmle.python.ApiGraphs
1818
import semmle.python.dataflow.new.RemoteFlowSources
1919
import semmle.python.dataflow.new.internal.DataFlowPublic
2020
import experimental.semmle.python.security.DecompressionBomb
21-
import FileAndFormRemoteFlowSource::FileAndFormRemoteFlowSource
22-
23-
/**
24-
* `io.TextIOWrapper(ip, encoding='utf-8')` like following:
25-
* ```python
26-
* with gzip.open(bomb_input, 'rb') as ip:
27-
* with io.TextIOWrapper(ip, encoding='utf-8') as decoder:
28-
* content = decoder.read()
29-
* print(content)
30-
* ```
31-
* I saw this builtin method many places so I added it as a AdditionalTaintStep.
32-
* it would be nice if it is added as a global AdditionalTaintStep
33-
*/
34-
predicate isAdditionalTaintStepTextIOWrapper(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
35-
exists(API::CallNode textIOWrapper |
36-
textIOWrapper = API::moduleImport("io").getMember("TextIOWrapper").getACall()
37-
|
38-
nodeFrom = textIOWrapper.getParameter(0, "input").asSink() and
39-
nodeTo = textIOWrapper
40-
)
41-
}
42-
43-
module BombsConfig implements DataFlow::ConfigSig {
44-
predicate isSource(DataFlow::Node source) {
45-
(
46-
source instanceof RemoteFlowSource
47-
or
48-
source instanceof FastAPI
49-
) and
50-
not source.getLocation().getFile().inStdlib() and
51-
not source.getLocation().getFile().getRelativePath().matches("%venv%")
52-
}
53-
54-
predicate isSink(DataFlow::Node sink) {
55-
sink instanceof DecompressionBomb::Sink and
56-
not sink.getLocation().getFile().inStdlib() and
57-
not sink.getLocation().getFile().getRelativePath().matches("%venv%")
58-
}
59-
60-
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
61-
(
62-
any(DecompressionBomb::AdditionalTaintStep a).isAdditionalTaintStep(pred, succ) or
63-
isAdditionalTaintStepTextIOWrapper(pred, succ)
64-
) and
65-
not succ.getLocation().getFile().inStdlib() and
66-
not succ.getLocation().getFile().getRelativePath().matches("%venv%")
67-
}
68-
}
69-
70-
module BombsFlow = TaintTracking::Global<BombsConfig>;
71-
7221
import BombsFlow::PathGraph
7322

7423
from BombsFlow::PathNode source, BombsFlow::PathNode sink

python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import semmle.python.dataflow.new.TaintTracking
44
import semmle.python.ApiGraphs
55
import semmle.python.dataflow.new.RemoteFlowSources
66
import semmle.python.dataflow.new.internal.DataFlowPublic
7+
import FileAndFormRemoteFlowSource::FileAndFormRemoteFlowSource
78

89
module DecompressionBomb {
910
/**
@@ -358,3 +359,42 @@ module Lzma {
358359
}
359360
}
360361
}
362+
363+
/**
364+
* `io.TextIOWrapper(ip, encoding='utf-8')` like following:
365+
* ```python
366+
* with gzip.open(bomb_input, 'rb') as ip:
367+
* with io.TextIOWrapper(ip, encoding='utf-8') as decoder:
368+
* content = decoder.read()
369+
* print(content)
370+
* ```
371+
* I saw this builtin method many places so I added it as a AdditionalTaintStep.
372+
* it would be nice if it is added as a global AdditionalTaintStep
373+
*/
374+
predicate isAdditionalTaintStepTextIOWrapper(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
375+
exists(API::CallNode textIOWrapper |
376+
textIOWrapper = API::moduleImport("io").getMember("TextIOWrapper").getACall()
377+
|
378+
nodeFrom = textIOWrapper.getParameter(0, "input").asSink() and
379+
nodeTo = textIOWrapper
380+
)
381+
}
382+
383+
module BombsConfig implements DataFlow::ConfigSig {
384+
predicate isSource(DataFlow::Node source) {
385+
source instanceof RemoteFlowSource
386+
or
387+
source instanceof FastAPI
388+
}
389+
390+
predicate isSink(DataFlow::Node sink) { sink instanceof DecompressionBomb::Sink }
391+
392+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
393+
(
394+
any(DecompressionBomb::AdditionalTaintStep a).isAdditionalTaintStep(pred, succ) or
395+
isAdditionalTaintStepTextIOWrapper(pred, succ)
396+
)
397+
}
398+
}
399+
400+
module BombsFlow = TaintTracking::Global<BombsConfig>;
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import python
2+
import semmle.python.dataflow.new.DataFlow
3+
import semmle.python.dataflow.new.TaintTracking
4+
import semmle.python.ApiGraphs
5+
6+
/**
7+
* Provides user-controllable Remote sources for file(s) upload and Multipart-Form
8+
*/
9+
module FileAndFormRemoteFlowSource {
10+
/**
11+
* A
12+
*/
13+
class FastAPI extends DataFlow::Node {
14+
FastAPI() {
15+
exists(API::Node fastApiParam, Expr fastApiUploadFile |
16+
fastApiParam =
17+
API::moduleImport("fastapi")
18+
.getMember("FastAPI")
19+
.getReturn()
20+
.getMember("post")
21+
.getReturn()
22+
.getParameter(0)
23+
.getKeywordParameter(_) and
24+
fastApiUploadFile =
25+
API::moduleImport("fastapi")
26+
.getMember("UploadFile")
27+
.getASubclass*()
28+
.getAValueReachableFromSource()
29+
.asExpr()
30+
|
31+
// Multiple uploaded files as list of fastapi.UploadFile
32+
// @app.post("/")
33+
// def upload(files: List[UploadFile] = File(...)):
34+
// for file in files:
35+
fastApiUploadFile =
36+
fastApiParam.asSource().asExpr().(Parameter).getAnnotation().getASubExpression*() and
37+
exists(For f, Attribute attr |
38+
fastApiParam.getAValueReachableFromSource().asExpr() = f.getIter().getASubExpression*()
39+
|
40+
TaintTracking::localExprTaint(f.getIter(), attr.getObject()) and
41+
attr.getName() = ["filename", "content_type", "headers", "file", "read"] and
42+
this.asExpr() = attr
43+
)
44+
or
45+
// One uploaded file as fastapi.UploadFile
46+
// @app.post("/zipbomb2")
47+
// async def zipbomb2(file: UploadFile):
48+
// print(file.filename)
49+
this =
50+
[
51+
fastApiParam.getMember(["filename", "content_type", "headers"]).asSource(),
52+
fastApiParam
53+
.getMember("file")
54+
.getMember(["readlines", "readline", "read"])
55+
.getReturn()
56+
.asSource(), fastApiParam.getMember("read").getReturn().asSource()
57+
]
58+
)
59+
}
60+
61+
string getSourceType() { result = "fastapi HTTP FORM files" }
62+
}
63+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import python
2+
import experimental.dataflow.TestUtil.DataflowQueryTest
3+
import experimental.semmle.python.security.DecompressionBomb
4+
import FromTaintTrackingConfig<BombsConfig>

python/ql/test/experimental/query-tests/Security/CWE-409/test.py

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,70 +7,75 @@
77

88
@app.post("/bomb")
99
async def bomb(file_path):
10-
zipfile.ZipFile(file_path, "r").extract("file1")
11-
zipfile.ZipFile(file_path, "r").extractall()
10+
zipfile.ZipFile(file_path, "r").extract("file1") # $ result=BAD
11+
zipfile.ZipFile(file_path, "r").extractall() # $ result=BAD
1212

1313
with zipfile.ZipFile(file_path) as myzip:
14-
with myzip.open('ZZ') as myfile:
14+
with myzip.open('ZZ') as myfile: # $ result=BAD
1515
a = myfile.readline()
1616

1717
with zipfile.ZipFile(file_path) as myzip:
18-
with myzip.open('ZZ', mode="w") as myfile:
18+
with myzip.open('ZZ', mode="w") as myfile: # $result=OK
1919
myfile.write(b"tmpppp")
2020

21-
zipfile.ZipFile(file_path).read("aFileNameInTheZipFile")
21+
zipfile.ZipFile(file_path).read("aFileNameInTheZipFile") # $ result=BAD
2222

23-
tarfile.open(file_path).extractfile("file1.txt")
24-
tarfile.TarFile.open(file_path).extract("somefile")
25-
tarfile.TarFile.xzopen(file_path).extract("somefile")
26-
tarfile.TarFile.gzopen(file_path).extractall()
27-
tarfile.TarFile.open(file_path).extractfile("file1.txt")
23+
tarfile.open(file_path).extractfile("file1.txt") # $ result=BAD
24+
tarfile.TarFile.open(file_path).extract("somefile") # $ result=BAD
25+
tarfile.TarFile.xzopen(file_path).extract("somefile") # $ result=BAD
26+
tarfile.TarFile.gzopen(file_path).extractall() # $ result=BAD
27+
tarfile.TarFile.open(file_path).extractfile("file1.txt") # $ result=BAD
2828

29-
tarfile.open(file_path, mode="w")
30-
tarfile.TarFile.gzopen(file_path, mode="w")
31-
tarfile.TarFile.open(file_path, mode="r:")
29+
tarfile.open(file_path, mode="w") # $result=OK
30+
tarfile.TarFile.gzopen(file_path, mode="w") # $result=OK
31+
tarfile.TarFile.open(file_path, mode="r:") # $ result=BAD
3232
import shutil
3333

34-
shutil.unpack_archive(file_path)
34+
shutil.unpack_archive(file_path) # $ result=BAD
3535

3636
import lzma
3737

38-
lzma.open(file_path)
39-
lzma.LZMAFile(file_path).read()
38+
lzma.open(file_path) # $ result=BAD
39+
lzma.LZMAFile(file_path).read() # $ result=BAD
4040

4141
import bz2
4242

43-
bz2.open(file_path)
44-
bz2.BZ2File(file_path).read()
43+
bz2.open(file_path) # $ result=BAD
44+
bz2.BZ2File(file_path).read() # $ result=BAD
4545

4646
import gzip
4747

48-
gzip.open(file_path)
49-
gzip.GzipFile(file_path)
48+
gzip.open(file_path) # $ result=BAD
49+
gzip.GzipFile(file_path) # $ result=BAD
5050

5151
import pandas
5252

53-
pandas.read_csv(filepath_or_buffer=file_path)
53+
pandas.read_csv(filepath_or_buffer=file_path) # $ result=BAD
5454

55-
pandas.read_table(file_path, compression='gzip')
56-
pandas.read_xml(file_path, compression='gzip')
55+
pandas.read_table(file_path, compression='gzip') # $ result=BAD
56+
pandas.read_xml(file_path, compression='gzip') # $ result=BAD
5757

58-
pandas.read_csv(filepath_or_buffer=file_path, compression='gzip')
59-
pandas.read_json(file_path, compression='gzip')
60-
pandas.read_sas(file_path, compression='gzip')
61-
pandas.read_stata(filepath_or_buffer=file_path, compression='gzip')
62-
pandas.read_table(file_path, compression='gzip')
63-
pandas.read_xml(path_or_buffer=file_path, compression='gzip')
58+
pandas.read_csv(filepath_or_buffer=file_path,
59+
compression='gzip') # $ result=BAD
60+
pandas.read_json(file_path, compression='gzip') # $ result=BAD
61+
pandas.read_sas(file_path, compression='gzip') # $ result=BAD
62+
pandas.read_stata(filepath_or_buffer=file_path,
63+
compression='gzip') # $ result=BAD
64+
pandas.read_table(file_path, compression='gzip') # $ result=BAD
65+
pandas.read_xml(path_or_buffer=file_path,
66+
compression='gzip') # $ result=BAD
6467

6568
# no compression no DOS
66-
pandas.read_table(file_path, compression='tar')
67-
pandas.read_xml(file_path, compression='tar')
68-
69-
pandas.read_csv(filepath_or_buffer=file_path, compression='tar')
70-
pandas.read_json(file_path, compression='tar')
71-
pandas.read_sas(file_path, compression='tar')
72-
pandas.read_stata(filepath_or_buffer=file_path, compression='tar')
73-
pandas.read_table(file_path, compression='tar')
74-
pandas.read_xml(path_or_buffer=file_path, compression='tar')
69+
pandas.read_table(file_path, compression='tar') # $result=OK
70+
pandas.read_xml(file_path, compression='tar') # $result=OK
71+
72+
pandas.read_csv(filepath_or_buffer=file_path,
73+
compression='tar') # $result=OK
74+
pandas.read_json(file_path, compression='tar') # $result=OK
75+
pandas.read_sas(file_path, compression='tar') # $result=OK
76+
pandas.read_stata(filepath_or_buffer=file_path,
77+
compression='tar') # $result=OK
78+
pandas.read_table(file_path, compression='tar') # $result=OK
79+
pandas.read_xml(path_or_buffer=file_path, compression='tar') # $result=OK
7580

7681
return {"message": "bomb"}

0 commit comments

Comments
 (0)