19
19
*/
20
20
package org .sonar .python .checks .regex ;
21
21
22
- import java .util .Arrays ;
22
+ import java .util .Collections ;
23
+ import java .util .HashMap ;
23
24
import java .util .HashSet ;
24
25
import java .util .List ;
26
+ import java .util .Map ;
25
27
import java .util .Optional ;
26
28
import java .util .Set ;
29
+ import java .util .regex .Pattern ;
27
30
import javax .annotation .Nullable ;
28
31
import org .sonar .plugins .python .api .PythonSubscriptionCheck ;
29
32
import org .sonar .plugins .python .api .SubscriptionContext ;
30
33
import org .sonar .plugins .python .api .symbols .Symbol ;
34
+ import org .sonar .plugins .python .api .tree .BinaryExpression ;
31
35
import org .sonar .plugins .python .api .tree .CallExpression ;
32
36
import org .sonar .plugins .python .api .tree .Expression ;
37
+ import org .sonar .plugins .python .api .tree .QualifiedExpression ;
33
38
import org .sonar .plugins .python .api .tree .RegularArgument ;
34
39
import org .sonar .plugins .python .api .tree .StringLiteral ;
35
40
import org .sonar .plugins .python .api .tree .Tree ;
38
43
import org .sonar .python .tree .TreeUtils ;
39
44
import org .sonarsource .analyzer .commons .regex .RegexIssueLocation ;
40
45
import org .sonarsource .analyzer .commons .regex .RegexParseResult ;
46
+ import org .sonarsource .analyzer .commons .regex .ast .FlagSet ;
41
47
import org .sonarsource .analyzer .commons .regex .ast .RegexSyntaxElement ;
42
48
43
49
public abstract class AbstractRegexCheck extends PythonSubscriptionCheck {
44
50
45
- private static final Set <String > REGEX_FUNCTIONS = new HashSet <>(Arrays .asList ("re.sub" , "re.subn" , "re.compile" , "re.search" , "re.match" ,
46
- "re.fullmatch" , "re.split" , "re.findall" , "re.finditer" ));
51
+ private static final Map <String , Integer > REGEX_FUNCTIONS_TO_FLAG_PARAM = new HashMap <>();
52
+
53
+ static {
54
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.sub" , 4 );
55
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.subn" , 4 );
56
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.compile" , null );
57
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.search" , 2 );
58
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.match" , 2 );
59
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.fullmatch" , 2 );
60
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.split" , 3 );
61
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.findall" , 2 );
62
+ REGEX_FUNCTIONS_TO_FLAG_PARAM .put ("re.finditer" , 2 );
63
+ }
64
+
47
65
protected RegexContext regexContext ;
48
66
49
67
// We want to report only one issue per element for one rule.
50
68
protected final Set <RegexSyntaxElement > reportedRegexTrees = new HashSet <>();
51
69
52
- protected Set <String > lookedUpFunctionNames () {
53
- return REGEX_FUNCTIONS ;
70
+ /**
71
+ * Should return a map whose keys are the functions the check is interested in, and the values are the position of the flags parameter.
72
+ * Set the position of the flags parameter to {@code null} if there is none.
73
+ */
74
+ protected Map <String , Integer > lookedUpFunctions () {
75
+ return REGEX_FUNCTIONS_TO_FLAG_PARAM ;
54
76
}
55
77
56
78
@ Override
@@ -67,17 +89,20 @@ private void checkCall(SubscriptionContext ctx) {
67
89
if (calleeSymbol == null || calleeSymbol .fullyQualifiedName () == null ) {
68
90
return ;
69
91
}
70
- if (lookedUpFunctionNames ().contains (calleeSymbol .fullyQualifiedName ())) {
92
+ String functionFqn = calleeSymbol .fullyQualifiedName ();
93
+ if (functionFqn != null && lookedUpFunctions ().containsKey (functionFqn )) {
94
+ FlagSet flagSet = getFlagSet (callExpression , functionFqn );
95
+
71
96
patternArgStringLiteral (callExpression )
72
- .flatMap (this :: regexForStringLiteral )
97
+ .flatMap (l -> regexForStringLiteral ( l , flagSet ) )
73
98
.ifPresent (parseResult -> checkRegex (parseResult , callExpression ));
74
99
}
75
100
}
76
101
77
- private Optional <RegexParseResult > regexForStringLiteral (StringLiteral literal ) {
102
+ private Optional <RegexParseResult > regexForStringLiteral (StringLiteral literal , FlagSet flagSet ) {
78
103
// TODO: for now we only handle strings with an "r" prefix. This will be extended.
79
104
if (literal .stringElements ().size () == 1 && "r" .equalsIgnoreCase (literal .stringElements ().get (0 ).prefix ())) {
80
- return Optional .of (regexContext .regexForStringElement (literal .stringElements ().get (0 )));
105
+ return Optional .of (regexContext .regexForStringElement (literal .stringElements ().get (0 ), flagSet ));
81
106
}
82
107
return Optional .empty ();
83
108
}
@@ -94,6 +119,94 @@ private static Optional<StringLiteral> patternArgStringLiteral(CallExpression re
94
119
return Optional .empty ();
95
120
}
96
121
122
+ private FlagSet getFlagSet (CallExpression callExpression , String functionFqn ) {
123
+ HashSet <QualifiedExpression > flags = new HashSet <>();
124
+ getFlagsArgValue (callExpression , lookedUpFunctions ().get (functionFqn )).ifPresent (f -> flags .addAll (extractFlagExpressions (f )));
125
+ FlagSet flagSet = new FlagSet ();
126
+ flags .stream ()
127
+ .map (AbstractRegexCheck ::mapPythonFlag )
128
+ .filter (Optional ::isPresent )
129
+ .map (Optional ::get )
130
+ .forEach (flagSet ::add );
131
+
132
+ // TODO: Don't do this when PYTHON_VERSION is 2
133
+ // We used Pattern.LITERAL to represent re.ASCII. So we are checking if re.ASCII is set here.
134
+ // For python3 matches are Unicode by default, and re.ASCII can be used to deactivate that.
135
+ if (!flagSet .contains (Pattern .LITERAL )) {
136
+ flagSet .add (Pattern .UNICODE_CHARACTER_CLASS );
137
+ flagSet .add (Pattern .UNICODE_CASE );
138
+ }
139
+ flagSet .removeAll (new FlagSet (Pattern .LITERAL ));
140
+
141
+ return flagSet ;
142
+ }
143
+
144
+ private static Optional <Expression > getFlagsArgValue (CallExpression regexFunctionCall , @ Nullable Integer argPosition ) {
145
+ if (argPosition == null ) {
146
+ return Optional .empty ();
147
+ }
148
+ RegularArgument patternArgument = TreeUtils .nthArgumentOrKeyword (argPosition , "flags" , regexFunctionCall .arguments ());
149
+ return patternArgument != null ? Optional .of (patternArgument .expression ()) : Optional .empty ();
150
+ }
151
+
152
+ private static HashSet <QualifiedExpression > extractFlagExpressions (Tree flagsSubexpr ) {
153
+ if (flagsSubexpr .is (Tree .Kind .QUALIFIED_EXPR )) {
154
+ return new HashSet <>(Collections .singletonList ((QualifiedExpression ) flagsSubexpr ));
155
+ } else if (flagsSubexpr .is (Tree .Kind .BITWISE_OR )) {
156
+ // recurse into left and right branch
157
+ BinaryExpression orExpr = (BinaryExpression ) flagsSubexpr ;
158
+ HashSet <QualifiedExpression > flags = extractFlagExpressions (orExpr .leftOperand ());
159
+ flags .addAll (extractFlagExpressions (orExpr .rightOperand ()));
160
+ return flags ;
161
+ } else {
162
+ // failed to interpret. Ignore leaf.
163
+ return new HashSet <>();
164
+ }
165
+ }
166
+
167
+ public static Optional <Integer > mapPythonFlag (QualifiedExpression ch ) {
168
+ Symbol symbol = ch .symbol ();
169
+ if (symbol == null ) {
170
+ return Optional .empty ();
171
+ }
172
+ String symbolFqn = symbol .fullyQualifiedName ();
173
+ if (symbolFqn == null ) {
174
+ return Optional .empty ();
175
+ }
176
+
177
+ Integer result ;
178
+ switch (symbolFqn ) {
179
+ case "re.IGNORECASE" :
180
+ case "re.I" :
181
+ result = Pattern .CASE_INSENSITIVE ;
182
+ break ;
183
+ case "re.MULTILINE" :
184
+ case "re.M" :
185
+ result = Pattern .MULTILINE ;
186
+ break ;
187
+ case "re.DOTALL" :
188
+ case "re.S" :
189
+ result = Pattern .DOTALL ;
190
+ break ;
191
+ case "re.VERBOSE" :
192
+ case "re.X" :
193
+ result = Pattern .COMMENTS ;
194
+ break ;
195
+ case "re.UNICODE" :
196
+ case "re.U" :
197
+ result = Pattern .UNICODE_CHARACTER_CLASS ;
198
+ break ;
199
+ case "re.ASCII" :
200
+ case "re.A" :
201
+ // We misuse Pattern.LITERAL to represent re.ASCII. It will be removed before being provided to the parser.
202
+ result = Pattern .LITERAL ;
203
+ break ;
204
+ default :
205
+ result = null ;
206
+ }
207
+ return Optional .ofNullable (result );
208
+ }
209
+
97
210
public void addIssue (RegexSyntaxElement regexTree , String message , @ Nullable Integer cost , List <RegexIssueLocation > secondaries ) {
98
211
if (reportedRegexTrees .add (regexTree )) {
99
212
PreciseIssue issue = regexContext .addIssue (regexTree , message );
0 commit comments