1
1
import python
2
- import semmle.python.objects.ObjectInternal
2
+ deprecated import semmle.python.objects.ObjectInternal as OI
3
+ private import semmle.python.ApiGraphs
3
4
4
- private predicate re_module_function ( string name , int flags ) {
5
- name = "compile" and flags = 1
5
+ /**
6
+ * Gets the positional argument index containing the regular expression flags for the member of the
7
+ * `re` module with the name `name`.
8
+ */
9
+ private int re_member_flags_arg ( string name ) {
10
+ name = "compile" and result = 1
6
11
or
7
- name = "search" and flags = 2
12
+ name = "search" and result = 2
8
13
or
9
- name = "match" and flags = 2
14
+ name = "match" and result = 2
10
15
or
11
- name = "split" and flags = 3
16
+ name = "split" and result = 3
12
17
or
13
- name = "findall" and flags = 2
18
+ name = "findall" and result = 2
14
19
or
15
- name = "finditer" and flags = 2
20
+ name = "finditer" and result = 2
16
21
or
17
- name = "sub" and flags = 4
22
+ name = "sub" and result = 4
18
23
or
19
- name = "subn" and flags = 4
24
+ name = "subn" and result = 4
20
25
}
21
26
22
27
/**
23
- * Gets the names and corresponding values of attributes of the `re` module that are likely to be
28
+ * Gets the names and corresponding API nodes of members of the `re` module that are likely to be
24
29
* methods taking regular expressions as arguments.
25
30
*
26
31
* This is a helper predicate that fixes a bad join order, and should not be inlined without checking
27
32
* that this is safe.
28
33
*/
29
34
pragma [ nomagic]
30
- private Value relevant_re_attr ( string name ) {
31
- result = Module :: named ( "re" ) .attr ( name ) and
35
+ private API :: Node relevant_re_member ( string name ) {
36
+ result = API :: moduleImport ( "re" ) .getMember ( name ) and
32
37
name != "escape"
33
38
}
34
39
@@ -39,24 +44,78 @@ private Value relevant_re_attr(string name) {
39
44
predicate used_as_regex ( Expr s , string mode ) {
40
45
( s instanceof Bytes or s instanceof Unicode ) and
41
46
/* Call to re.xxx(regex, ... [mode]) */
42
- exists ( CallNode call , string name |
43
- call .getArg ( 0 ) .pointsTo ( _ , _ , s . getAFlowNode ( ) ) and
44
- call . getFunction ( ) . pointsTo ( relevant_re_attr ( name ) )
47
+ exists ( DataFlow :: CallCfgNode call , string name |
48
+ call .getArg ( 0 ) .asExpr ( ) = s and
49
+ call = relevant_re_member ( name ) . getACall ( )
45
50
|
46
51
mode = "None"
47
52
or
48
- exists ( Value obj | mode = mode_from_mode_object ( obj ) |
49
- exists ( int flags_arg |
50
- re_module_function ( name , flags_arg ) and
51
- call .getArg ( flags_arg ) .pointsTo ( obj )
52
- )
53
- or
54
- call .getArgByName ( "flags" ) .pointsTo ( obj )
53
+ mode = mode_from_node ( [ call .getArg ( re_member_flags_arg ( name ) ) , call .getArgByName ( "flags" ) ] )
54
+ )
55
+ }
56
+
57
+ /**
58
+ * Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
59
+ * that have multiple names, we pick the long-form name as a canonical representative.
60
+ */
61
+ private string canonical_name ( API:: Node flag ) {
62
+ result in [ "ASCII" , "IGNORECASE" , "LOCALE" , "UNICODE" , "MULTILINE" , "TEMPLATE" ] and
63
+ flag = API:: moduleImport ( "re" ) .getMember ( [ result , result .prefix ( 1 ) ] )
64
+ or
65
+ flag = API:: moduleImport ( "re" ) .getMember ( [ "DOTALL" , "S" ] ) and result = "DOTALL"
66
+ or
67
+ flag = API:: moduleImport ( "re" ) .getMember ( [ "VERBOSE" , "X" ] ) and result = "VERBOSE"
68
+ }
69
+
70
+ /**
71
+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
72
+ * to the `re` flag with the canonical name `flag_name`
73
+ */
74
+ private DataFlow:: LocalSourceNode re_flag_tracker ( string flag_name , DataFlow:: TypeTracker t ) {
75
+ t .start ( ) and
76
+ exists ( API:: Node flag | flag_name = canonical_name ( flag ) and result = flag .getAUse ( ) )
77
+ or
78
+ exists ( BinaryExprNode binop , DataFlow:: Node operand |
79
+ operand .getALocalSource ( ) = re_flag_tracker ( flag_name , t .continue ( ) ) and
80
+ operand .asCfgNode ( ) = binop .getAnOperand ( ) and
81
+ ( binop .getOp ( ) instanceof BitOr or binop .getOp ( ) instanceof Add ) and
82
+ result .asCfgNode ( ) = binop
83
+ )
84
+ or
85
+ // Due to bad performance when using normal setup with `re_flag_tracker(t2, attr_name).track(t2, t)`
86
+ // we have inlined that code and forced a join
87
+ exists ( DataFlow:: TypeTracker t2 |
88
+ exists ( DataFlow:: StepSummary summary |
89
+ re_flag_tracker_first_join ( t2 , flag_name , result , summary ) and
90
+ t = t2 .append ( summary )
55
91
)
56
92
)
57
93
}
58
94
59
- string mode_from_mode_object ( Value obj ) {
95
+ pragma [ nomagic]
96
+ private predicate re_flag_tracker_first_join (
97
+ DataFlow:: TypeTracker t2 , string flag_name , DataFlow:: Node res , DataFlow:: StepSummary summary
98
+ ) {
99
+ DataFlow:: StepSummary:: step ( re_flag_tracker ( flag_name , t2 ) , res , summary )
100
+ }
101
+
102
+ /**
103
+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
104
+ * to the `re` flag with the canonical name `flag_name`
105
+ */
106
+ private DataFlow:: Node re_flag_tracker ( string flag_name ) {
107
+ re_flag_tracker ( flag_name , DataFlow:: TypeTracker:: end ( ) ) .flowsTo ( result )
108
+ }
109
+
110
+ /** Gets a regular expression mode flag associated with the given data flow node. */
111
+ string mode_from_node ( DataFlow:: Node node ) { node = re_flag_tracker ( result ) }
112
+
113
+ /**
114
+ * DEPRECATED 2021-02-24 -- use `mode_from_node` instead.
115
+ *
116
+ * Gets a regular expression mode flag associated with the given value.
117
+ */
118
+ deprecated string mode_from_mode_object ( Value obj ) {
60
119
(
61
120
result = "DEBUG" or
62
121
result = "IGNORECASE" or
@@ -67,8 +126,8 @@ string mode_from_mode_object(Value obj) {
67
126
result = "VERBOSE"
68
127
) and
69
128
exists ( int flag |
70
- flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( ObjectInternal ) .intValue ( ) and
71
- obj .( ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
129
+ flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( OI :: ObjectInternal ) .intValue ( ) and
130
+ obj .( OI :: ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
72
131
)
73
132
}
74
133
0 commit comments