Skip to content

Commit 1f767b8

Browse files
Sim4n6yoff
authored andcommitted
Add some comments and docs
1 parent 5cc9170 commit 1f767b8

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import semmle.python.dataflow.new.TaintTracking
1717
import semmle.python.dataflow.new.internal.DataFlowPublic
1818
import semmle.python.dataflow.new.RemoteFlowSources
1919

20+
// The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
21+
// and textnorm modules. The use of argIdx is to constraint the argument being normalized.
2022
class UnicodeCompatibilityNormalize extends API::CallNode {
2123
int argIdx;
2224

@@ -66,10 +68,12 @@ predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branc
6668
exists(API::CallNode lenCall, Cmpop op_gt, Cmpop op_lt, Node n |
6769
lenCall = n.getALocalSource() and
6870
(
71+
// arg <= LIMIT OR arg < LIMIT
6972
(op_lt = any(LtE lte) or op_lt = any(Lt lt)) and
7073
branch = true and
7174
cn.operands(n.asCfgNode(), op_lt, _)
7275
or
76+
// LIMIT >= arg OR LIMIT > arg
7377
(op_gt = any(GtE gte) or op_gt = any(Gt gt)) and
7478
branch = true and
7579
cn.operands(_, op_gt, n.asCfgNode())
@@ -88,12 +92,16 @@ class Configuration extends TaintTracking::Configuration {
8892
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
8993

9094
override predicate isSanitizer(DataFlow::Node sanitizer) {
95+
// underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
9196
sanitizer = DataFlow::BarrierGuard<underAValue/3>::getABarrierNode()
9297
}
9398

9499
override predicate isSink(DataFlow::Node sink) {
100+
// Any call to the Unicode compatibility normalization is a costly operation
95101
sink = any(UnicodeCompatibilityNormalize ucn).getPathArg()
96102
or
103+
// The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
104+
// under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
97105
sink = API::moduleImport("werkzeug").getMember("secure_filename").getACall().getArg(_)
98106
or
99107
sink =
@@ -102,7 +110,6 @@ class Configuration extends TaintTracking::Configuration {
102110
.getMember("secure_filename")
103111
.getACall()
104112
.getArg(_)
105-
106113
}
107114
}
108115

0 commit comments

Comments
 (0)