Skip to content

Commit 716e412

Browse files
author
Jeremi Do Dinh
authored
SONARPY-1437 Rule S6714: Passing a list to np.array should be preferred over passing a generator. (#1573)
1 parent e82c423 commit 716e412

File tree

7 files changed

+241
-0
lines changed

7 files changed

+241
-0
lines changed

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ public static Iterable<Class> getChecks() {
271271
NotDiscoverableTestMethodCheck.class,
272272
NotImplementedErrorInOperatorMethodsCheck.class,
273273
NumpyIsNanCheck.class,
274+
NumpyListOverGeneratorCheck.class,
274275
NumpyRandomSeedCheck.class,
275276
NumpyRandomStateCheck.class,
276277
NumpyWhereOneConditionCheck.class,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2023 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks;
21+
22+
import java.util.List;
23+
import java.util.Optional;
24+
import java.util.Set;
25+
import org.sonar.check.Rule;
26+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
27+
import org.sonar.plugins.python.api.SubscriptionContext;
28+
import org.sonar.plugins.python.api.symbols.Symbol;
29+
import org.sonar.plugins.python.api.tree.Argument;
30+
import org.sonar.plugins.python.api.tree.CallExpression;
31+
import org.sonar.plugins.python.api.tree.Expression;
32+
import org.sonar.plugins.python.api.tree.HasSymbol;
33+
import org.sonar.plugins.python.api.tree.Name;
34+
import org.sonar.plugins.python.api.tree.RegularArgument;
35+
import org.sonar.plugins.python.api.tree.Tree;
36+
import org.sonar.python.cfg.fixpoint.ReachingDefinitionsAnalysis;
37+
import org.sonar.python.tree.TreeUtils;
38+
39+
@Rule(key = "S6714")
40+
public class NumpyListOverGeneratorCheck extends PythonSubscriptionCheck {
41+
42+
public static final String MESSAGE = "Pass a list to \"np.array\" instead of passing a generator.";
43+
private ReachingDefinitionsAnalysis reachingDefinitionsAnalysis;
44+
45+
@Override
46+
public void initialize(Context context) {
47+
context.registerSyntaxNodeConsumer(Tree.Kind.FILE_INPUT, ctx -> reachingDefinitionsAnalysis = new ReachingDefinitionsAnalysis((ctx.pythonFile())));
48+
context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::checkNumpyArrayCall);
49+
}
50+
51+
private void checkNumpyArrayCall(SubscriptionContext ctx) {
52+
CallExpression call = (CallExpression) ctx.syntaxNode();
53+
Optional.ofNullable(call.calleeSymbol())
54+
.map(Symbol::fullyQualifiedName)
55+
.filter("numpy.array"::equals)
56+
.ifPresent(fqn -> checkGeneratorCallee(call, ctx));
57+
}
58+
59+
private void checkGeneratorCallee(CallExpression call, SubscriptionContext ctx) {
60+
List<Argument> argList = call.arguments();
61+
if (argList.isEmpty()) {
62+
return;
63+
}
64+
65+
if (Optional.of(argList.get(0))
66+
.filter(arg -> arg.is(Tree.Kind.REGULAR_ARGUMENT))
67+
.map(RegularArgument.class::cast)
68+
.map(RegularArgument::expression)
69+
.filter(regArg -> (regArg.is(Tree.Kind.GENERATOR_EXPR) || this.isNamedGeneratorExpression(regArg)))
70+
.isEmpty()) {
71+
return;
72+
}
73+
74+
if (Optional.ofNullable(TreeUtils.nthArgumentOrKeyword(1, "dtype", argList))
75+
.filter(regArg -> regArg.expression().is(Tree.Kind.NAME))
76+
.map(regArg -> (Name) regArg.expression())
77+
.map(HasSymbol::symbol)
78+
.map(Symbol::fullyQualifiedName)
79+
.filter("object"::equals)
80+
.isEmpty()) {
81+
ctx.addIssue(call, MESSAGE);
82+
}
83+
}
84+
85+
private boolean isNamedGeneratorExpression(Expression expression) {
86+
return Optional.of(expression)
87+
.flatMap(TreeUtils.toOptionalInstanceOfMapper(Name.class))
88+
.map(name -> this.reachingDefinitionsAnalysis.valuesAtLocation(name))
89+
.filter(NumpyListOverGeneratorCheck::checkSetProperties)
90+
.isPresent();
91+
}
92+
93+
private static boolean checkSetProperties(Set<Expression> set) {
94+
return !set.isEmpty() && set.stream().allMatch(NumpyListOverGeneratorCheck::isGeneratorAndParentHasType);
95+
}
96+
97+
private static boolean isGeneratorAndParentHasType(Expression expression) {
98+
return expression.is(Tree.Kind.GENERATOR_EXPR) && !expression.parent().is(Tree.Kind.ANNOTATED_ASSIGNMENT);
99+
}
100+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<p>This rule raises an issue when a generator is passed to <code>np.array</code>.</p>
2+
<h2>Why is this an issue?</h2>
3+
<p>The creation of a NumPy array can be done in several ways, for example by passing a Python list to the <code>np.array</code> function. Another way
4+
would be to pass a generator to the <code>np.array</code> function, but doing so creates a 0-dimensional array of objects and may not be the intended
5+
goal. This NumPy array will have a have a data type (dtype) of <code>object</code> and could hold any Python objects.</p>
6+
<p>One of the characteristics of NumPy arrays is homogeneity, meaning all its elements are of the same type. Creating an array of objects allows the
7+
user to create heterogeneous array without raising any errors and creating such an array can lead to bugs further in the program.</p>
8+
<pre data-diff-id="1" data-diff-type="noncompliant">
9+
arr = np.array(x**2 for x in range(10))
10+
11+
arr.reshape(1)
12+
arr.resize(2)
13+
arr.put(indices=1, values=3) # No issues raised.
14+
</pre>
15+
<p>The NumPy array <code>arr</code> shown above now holds 2 values: a generator and the number 3.</p>
16+
<h2>How to fix it</h2>
17+
<p>To fix this issue, either:</p>
18+
<ul>
19+
<li> pass a Python list instead of a generator to the <code>np.array</code> function or, </li>
20+
<li> explicitly show the intention to create a 0-dimensional array of objects by either adding <code>Any</code> as the type hint of the generator or
21+
by specifying the <code>dtype</code> parameter of the NumPy array as <code>object</code>. </li>
22+
</ul>
23+
<h3>Code examples</h3>
24+
<h4>Noncompliant code example</h4>
25+
<pre data-diff-id="2" data-diff-type="noncompliant">
26+
arr = np.array(x**2 for x in range(10)) # Noncompliant: the resulting array will be of the data type: object.
27+
28+
gen = (x*2 for x in range(5))
29+
arr = np.array(gen) # Noncompliant: the resulting array will be of the data type: object.
30+
</pre>
31+
<h4>Compliant solution</h4>
32+
<pre data-diff-id="2" data-diff-type="compliant">
33+
from typing import Any
34+
35+
arr = np.array([x**2 for x in range(10)]) # Compliant: a list of 10 elements is passed to the np.array function.
36+
37+
arr = np.array(x**2 for x in range(10), dtype=object) # Compliant: the dtype parameter of np.array is set to object.
38+
39+
gen: Any = (x*2 for x in range(5))
40+
arr = np.array(gen) # Compliant: the generator is explicitly type hinted with Any.
41+
</pre>
42+
<h2>Resources</h2>
43+
<h3>Documentation</h3>
44+
<ul>
45+
<li> NumPy Documentation - <a href="https://numpy.org/doc/stable/reference/typing.html#arraylike">ArrayLike</a> </li>
46+
</ul>
47+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"title": "Passing a list to np.array should be preferred over passing a generator",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"numpy",
11+
"data-science"
12+
],
13+
"defaultSeverity": "Major",
14+
"ruleSpecification": "RSPEC-6714",
15+
"sqKey": "S6714",
16+
"scope": "All",
17+
"quickfix": "unknown",
18+
"code": {
19+
"impacts": {
20+
"MAINTAINABILITY": "MEDIUM",
21+
"RELIABILITY": "LOW"
22+
},
23+
"attribute": "LOGICAL"
24+
}
25+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@
206206
"S6663",
207207
"S6709",
208208
"S6711",
209+
"S6714",
209210
"S6725",
210211
"S6727",
211212
"S6729",
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2023 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks;
21+
22+
import org.junit.jupiter.api.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
class NumpyListOverGeneratorCheckTest {
26+
@Test
27+
void test() {
28+
PythonCheckVerifier.verify("src/test/resources/checks/numpyListOverGenerator.py", new NumpyListOverGeneratorCheck());
29+
}
30+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Any
2+
3+
4+
def import_1():
5+
import numpy as np
6+
from typing import Any
7+
8+
gen_1 = (x * 2 for x in range(5))
9+
arr = np.array(gen_1) # Noncompliant {{Pass a list to "np.array" instead of passing a generator.}}
10+
# ^^^^^^^^^^^^^^^
11+
gen_2: Any = (x * 2 for x in range(5))
12+
arr = np.array(gen_2)
13+
14+
gen_3: str = "Hello World"
15+
gen_3 = (x * 2 for x in range(5))
16+
arr = np.array(gen_3) # Noncompliant
17+
18+
gen_4 = 42
19+
np.array(gen_4)
20+
21+
gen_5: MyClass = (x * 2 for x in range(5))
22+
np.array(gen_5)
23+
24+
def test(xx):
25+
np.array(xx)
26+
np.array(xx, gen_1)
27+
28+
arr = np.array((x ** 2 for x in range(10)), dtype=object) # Compliant: the dtype parameter of np.array is set to object.
29+
30+
arr = np.array(x ** 2 for x in range(10)) # Noncompliant {{Pass a list to "np.array" instead of passing a generator.}}
31+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
32+
33+
arr = np.array([x ** 2 for x in range(10)]) # Compliant: a list of 10 elements is passed to the np.array function.
34+
35+
arr = np.array((x ** 2 for x in range(10)), dtype=str) # Noncompliant
36+
37+
arr = np.array()

0 commit comments

Comments
 (0)