@@ -17,6 +17,8 @@ import semmle.python.dataflow.new.TaintTracking
17
17
import semmle.python.dataflow.new.internal.DataFlowPublic
18
18
import semmle.python.dataflow.new.RemoteFlowSources
19
19
20
+ // The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
21
+ // and textnorm modules. The use of argIdx is to constraint the argument being normalized.
20
22
class UnicodeCompatibilityNormalize extends API:: CallNode {
21
23
int argIdx ;
22
24
@@ -66,10 +68,12 @@ predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branc
66
68
exists ( API:: CallNode lenCall , Cmpop op_gt , Cmpop op_lt , Node n |
67
69
lenCall = n .getALocalSource ( ) and
68
70
(
71
+ // arg <= LIMIT OR arg < LIMIT
69
72
( op_lt = any ( LtE lte ) or op_lt = any ( Lt lt ) ) and
70
73
branch = true and
71
74
cn .operands ( n .asCfgNode ( ) , op_lt , _)
72
75
or
76
+ // LIMIT >= arg OR LIMIT > arg
73
77
( op_gt = any ( GtE gte ) or op_gt = any ( Gt gt ) ) and
74
78
branch = true and
75
79
cn .operands ( _, op_gt , n .asCfgNode ( ) )
@@ -88,12 +92,16 @@ class Configuration extends TaintTracking::Configuration {
88
92
override predicate isSource ( DataFlow:: Node source ) { source instanceof RemoteFlowSource }
89
93
90
94
override predicate isSanitizer ( DataFlow:: Node sanitizer ) {
95
+ // underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
91
96
sanitizer = DataFlow:: BarrierGuard< underAValue / 3 > :: getABarrierNode ( )
92
97
}
93
98
94
99
override predicate isSink ( DataFlow:: Node sink ) {
100
+ // Any call to the Unicode compatibility normalization is a costly operation
95
101
sink = any ( UnicodeCompatibilityNormalize ucn ) .getPathArg ( )
96
102
or
103
+ // The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
104
+ // under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
97
105
sink = API:: moduleImport ( "werkzeug" ) .getMember ( "secure_filename" ) .getACall ( ) .getArg ( _)
98
106
or
99
107
sink =
@@ -102,7 +110,6 @@ class Configuration extends TaintTracking::Configuration {
102
110
.getMember ( "secure_filename" )
103
111
.getACall ( )
104
112
.getArg ( _)
105
-
106
113
}
107
114
}
108
115
0 commit comments