Skip to content

Commit 2492744

Browse files
authored
Merge pull request github#8443 from haby0/py/CsvInjection
Python: Add CSV injection model
2 parents 7a6646d + cd46f31 commit 2492744

File tree

10 files changed

+299
-0
lines changed

10 files changed

+299
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
# -*- coding: UTF-8 -*-
3+
"""
4+
@Desc :csv injection
5+
"""
6+
import copy
7+
import csv
8+
from flask import Flask
9+
from flask import request
10+
from typing import List
11+
12+
app = Flask(__name__)
13+
14+
@app.route('/bad1')
15+
def bad1():
16+
csv_data = request.args.get('csv')
17+
csvWriter = csv.writer(open("test.csv", "wt"))
18+
csvWriter.writerow(csv_data)
19+
return "bad1"
20+
21+
@app.route('/good1')
22+
def good1():
23+
csv_data = request.args.get('csv')
24+
csvWriter = csv.writer(open("test.csv", "wt"))
25+
csvWriter.writerow(santize_for_csv(csv_data))
26+
return "good1"
27+
28+
def santize_for_csv(data: str| List[str] | List[List[str]]):
29+
def sanitize(item):
30+
return "'" + item
31+
32+
unsafe_prefixes = ("+", "=", "-", "@")
33+
if isinstance(data, str):
34+
if data.startswith(unsafe_prefixes):
35+
return sanitize(data)
36+
return data
37+
elif isinstance(data, list) and isinstance(data[0], str):
38+
sanitized_data = copy.deepcopy(data)
39+
for index, item in enumerate(data):
40+
if item.startswith(unsafe_prefixes):
41+
sanitized_data[index] = sanitize(item)
42+
return sanitized_data
43+
elif isinstance(data[0], list) and isinstance(data[0][0], str):
44+
sanitized_data = copy.deepcopy(data)
45+
for outer_index, sublist in enumerate(data):
46+
for inner_index, item in enumerate(sublist):
47+
if item.startswith(unsafe_prefixes):
48+
sanitized_data[outer_index][inner_index] = sanitize(item)
49+
return sanitized_data
50+
else:
51+
raise ValueError("Unsupported data type: " + str(type(data)))
52+
53+
54+
if __name__ == '__main__':
55+
app.debug = True
56+
app.run()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>CSV Injection, also known as Formula Injection, occurs when websites embed untrusted input inside CSV files.</p>
7+
<p>When a CSV format file is opened with a spreadsheet program such as Microsoft Excel or LibreOffice Calc.
8+
this software interprets entries beginning with <code>=</code> as formulas, which may attempt information exfiltration
9+
or other malicious activity when automatically executed by the spreadsheet software.</p>
10+
</overview>
11+
<recommendation>
12+
13+
<p>When generating CSV output, ensure that formula-sensitive metacharacters are effectively escaped or removed from all data before storage in the resultant CSV.
14+
Risky characters include <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at).</p>
15+
16+
</recommendation>
17+
<example>
18+
19+
<p>The following examples show the bad case and the good case respectively.
20+
In <code>bad1</code> method, the data provided by the user is directly stored in the CSV file, which may be attacked.
21+
But in the <code>good1</code> method, the program will check the data provided by the user, and process the data starting with <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at) characters safely.</p>
22+
23+
<sample src="CsvInjection.py" />
24+
25+
</example>
26+
<references>
27+
<li>OWASP: <a href="https://owasp.org/www-community/attacks/CSV_Injection">CSV Injection</a>.</li>
28+
</references>
29+
</qhelp>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* @name Csv Injection
3+
* @description From user-controlled data saved in CSV files, it is easy to attempt information disclosure
4+
* or other malicious activities when automated by spreadsheet software
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @id py/csv-injection
8+
* @tags security
9+
* external/cwe/cwe-1236
10+
*/
11+
12+
import python
13+
import DataFlow::PathGraph
14+
import semmle.python.dataflow.new.DataFlow
15+
import experimental.semmle.python.security.injection.CsvInjection
16+
17+
from CsvInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
18+
where config.hasFlowPath(source, sink)
19+
select sink.getNode(), source, sink, "Csv injection might include code from $@.", source.getNode(),
20+
"this user input"

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,39 @@ class HeaderDeclaration extends DataFlow::Node {
371371
DataFlow::Node getValueArg() { result = range.getValueArg() }
372372
}
373373

374+
/** Provides classes for modeling Csv writer APIs. */
375+
module CsvWriter {
376+
/**
377+
* A data flow node for csv writer.
378+
*
379+
* Extend this class to model new APIs. If you want to refine existing API models,
380+
* extend `CsvWriter` instead.
381+
*/
382+
abstract class Range extends DataFlow::Node {
383+
/**
384+
* Get the parameter value of the csv writer function.
385+
*/
386+
abstract DataFlow::Node getAnInput();
387+
}
388+
}
389+
390+
/**
391+
* A data flow node for csv writer.
392+
*
393+
* Extend this class to refine existing API models. If you want to model new APIs,
394+
* extend `CsvWriter::Range` instead.
395+
*/
396+
class CsvWriter extends DataFlow::Node {
397+
CsvWriter::Range range;
398+
399+
CsvWriter() { this = range }
400+
401+
/**
402+
* Get the parameter value of the csv writer function.
403+
*/
404+
DataFlow::Node getAnInput() { result = range.getAnInput() }
405+
}
406+
374407
/**
375408
* A data-flow node that sets a cookie in an HTTP response.
376409
*

python/ql/src/experimental/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
99
private import experimental.semmle.python.frameworks.LDAP
1010
private import experimental.semmle.python.frameworks.NoSQL
1111
private import experimental.semmle.python.frameworks.JWT
12+
private import experimental.semmle.python.frameworks.Csv
1213
private import experimental.semmle.python.libraries.PyJWT
1314
private import experimental.semmle.python.libraries.Python_JWT
1415
private import experimental.semmle.python.libraries.Authlib
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `csv` PyPI package.
3+
* See https://docs.python.org/3/library/csv.html
4+
*/
5+
6+
private import python
7+
private import semmle.python.ApiGraphs
8+
private import experimental.semmle.python.Concepts
9+
10+
/**
11+
* Provides models for the `csv` PyPI package.
12+
*
13+
* See
14+
* - https://docs.python.org/3/library/csv.html
15+
*/
16+
private module Csv {
17+
private module Writer {
18+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
19+
20+
/** A direct instantiation of `csv.writer` or `csv.DictWriter`. */
21+
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
22+
ClassInstantiation() {
23+
this = API::moduleImport("csv").getMember(["writer", "DictWriter"]).getACall()
24+
}
25+
}
26+
27+
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
28+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
29+
t.start() and
30+
result instanceof InstanceSource
31+
or
32+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
33+
}
34+
35+
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
36+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
37+
38+
/**
39+
* See:
40+
* - https://docs.python.org/3/library/csv.html#csvwriter.writerow
41+
* - https://docs.python.org/3/library/csv.html#csvwriter.writerows
42+
*/
43+
private class CsvWriteCall extends CsvWriter::Range, DataFlow::CallCfgNode {
44+
string methodName;
45+
46+
CsvWriteCall() {
47+
methodName in ["writerow", "writerows"] and
48+
this.(DataFlow::MethodCallNode).calls(Writer::instance(), methodName)
49+
}
50+
51+
override DataFlow::Node getAnInput() {
52+
result = this.getArg(0)
53+
or
54+
methodName = "writerow" and
55+
result = this.getArgByName("row")
56+
or
57+
methodName = "writerows" and
58+
result = this.getArgByName("rows")
59+
}
60+
}
61+
62+
/**
63+
* See: https://docs.python.org/3/library/csv.html#csv.DictWriter
64+
*/
65+
private class DictWriterInstance extends CsvWriter::Range, DataFlow::CallCfgNode {
66+
DictWriterInstance() { this = API::moduleImport("csv").getMember("DictWriter").getACall() }
67+
68+
override DataFlow::Node getAnInput() {
69+
result in [this.getArg(1), this.getArgByName("fieldnames")]
70+
}
71+
}
72+
}
73+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import python
2+
import experimental.semmle.python.Concepts
3+
import semmle.python.dataflow.new.DataFlow
4+
import semmle.python.dataflow.new.TaintTracking
5+
import semmle.python.dataflow.new.BarrierGuards
6+
import semmle.python.dataflow.new.RemoteFlowSources
7+
8+
/**
9+
* A taint-tracking configuration for tracking untrusted user input used in file read.
10+
*/
11+
class CsvInjectionFlowConfig extends TaintTracking::Configuration {
12+
CsvInjectionFlowConfig() { this = "CsvInjectionFlowConfig" }
13+
14+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
15+
16+
override predicate isSink(DataFlow::Node sink) { sink = any(CsvWriter cw).getAnInput() }
17+
18+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
19+
guard instanceof StartsWithCheck or
20+
guard instanceof StringConstCompare
21+
}
22+
}
23+
24+
private class StartsWithCheck extends DataFlow::BarrierGuard {
25+
DataFlow::MethodCallNode mc;
26+
27+
StartsWithCheck() {
28+
this = mc.asCfgNode() and
29+
mc.calls(_, "startswith")
30+
}
31+
32+
override predicate checks(ControlFlowNode node, boolean branch) {
33+
node = mc.getObject().asCfgNode() and
34+
branch = true
35+
}
36+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
edges
2+
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute |
3+
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data |
4+
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data |
5+
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute |
6+
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data |
7+
nodes
8+
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
9+
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
10+
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
11+
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
12+
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
13+
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
14+
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
15+
subpaths
16+
#select
17+
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
18+
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
19+
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:24:16:24:22 | ControlFlowNode for request | this user input |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE-1236/CsvInjection.ql
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env python
2+
# -*- coding: UTF-8 -*-
3+
"""
4+
@Desc :csv injection
5+
"""
6+
import copy
7+
import csv
8+
from flask import Flask
9+
from flask import request
10+
from typing import List
11+
12+
app = Flask(__name__)
13+
14+
@app.route('/bad1')
15+
def bad1():
16+
csv_data = request.args.get('csv')
17+
csvWriter = csv.writer(open("test.csv", "wt"))
18+
csvWriter.writerow(csv_data) # bad
19+
csvWriter.writerows(csv_data) # bad
20+
return "bad1"
21+
22+
@app.route('/bad2')
23+
def bad2():
24+
csv_data = request.args.get('csv')
25+
csvWriter = csv.DictWriter(f, fieldnames=csv_data) # bad
26+
csvWriter.writeheader()
27+
return "bad2"
28+
29+
if __name__ == '__main__':
30+
app.debug = True
31+
app.run()

0 commit comments

Comments
 (0)