Skip to content

Commit 16b22de

Browse files
authored
Merge pull request #71 from cleophass/GCI98-python
GCI96 PandasRequireUsecolsArgument #Python #DLG #Build
2 parents e346945 + 191a852 commit 16b22de

File tree

8 files changed

+175
-1
lines changed

8 files changed

+175
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
<<<<<<< HEAD
13+
- [#71](https://github.com/green-code-initiative/creedengo-python/pull/71) Add rule GCI96 Require Usecols Argument in Pandas Read Functions
14+
=======
1215
- [#72](https://github.com/green-code-initiative/creedengo-python/pull/72) Add rule GCI97 Optimize square computation (scalar vs vectorized method)
16+
>>>>>>> upstream/main
1317
1418
### Changed
1519

src/it/java/org/greencodeinitiative/creedengo/python/integration/tests/GCIRulesIT.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,19 @@ void testGCI203_compliant() {
274274
}
275275

276276
@Test
277+
void testGCI96() {
278+
String filePath = "src/pandasRequireUsecols.py";
279+
String ruleId = "creedengo-python:GCI96";
280+
String ruleMsg = "Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns";
281+
int[] startLines = new int[]{
282+
3, 4, 5, 6, 7, 16, 19
283+
};
284+
int[] endLines = new int[]{
285+
3, 4, 5, 6, 7, 16, 19
286+
};
287+
288+
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_10MIN);
289+
}
277290
void testGCI97(){
278291
String filePath = "src/optimizeSquareComputation.py";
279292
String ruleId = "creedengo-python:GCI97";
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import pandas as pd
2+
3+
df1 = pd.read_csv('data.csv') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
4+
df2 = pd.read_parquet('data.parquet') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
5+
df3 = pd.read_excel('data.xlsx') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
6+
df4 = pd.read_json('data.json') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
7+
df5 = pd.read_feather('data.feather') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
8+
9+
df7 = pd.read_csv('data.csv', usecols=['col1', 'col2'])
10+
df8 = pd.read_parquet('data.parquet', columns=['col1', 'col2'])
11+
df9 = pd.read_excel('data.xlsx', usecols=[0, 1, 2])
12+
df10 = pd.read_json('data.json', columns=['col1', 'col2'])
13+
df11 = pd.read_feather('data.feather', columns=['col1', 'col2'])
14+
15+
import pandas as pandas_alias
16+
df14 = pandas_alias.read_csv('data.csv') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
17+
df15 = pandas_alias.read_csv('data.csv', usecols=['col1'])
18+
19+
df16 = pd.read_csv('data.csv', sep=',', header=0) # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
20+
df17 = pd.read_csv('data.csv', sep=',', header=0, usecols=['col1', 'col2'])
21+
22+
cols_to_use = ['col1', 'col2', 'col3']
23+
df18 = pd.read_parquet('data.parquet', columns=cols_to_use)

src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public class PythonRuleRepository implements RulesDefinition, PythonCustomRuleRe
4141
AvoidListComprehensionInIterations.class,
4242
DetectUnoptimizedImageFormat.class,
4343
AvoidMultipleIfElseStatementCheck.class,
44+
PandasRequireUsecolsArgument.class,
4445
OptimizeSquareComputation.class
4546
);
4647

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
3+
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
package org.greencodeinitiative.creedengo.python.checks;
19+
20+
import java.util.Arrays;
21+
import java.util.List;
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
24+
import org.sonar.plugins.python.api.SubscriptionContext;
25+
import org.sonar.plugins.python.api.tree.Argument;
26+
import org.sonar.plugins.python.api.tree.CallExpression;
27+
import org.sonar.plugins.python.api.tree.Tree;
28+
import org.sonar.plugins.python.api.tree.Expression;
29+
import org.sonar.plugins.python.api.tree.QualifiedExpression;
30+
import org.sonar.plugins.python.api.tree.RegularArgument;
31+
import static org.sonar.plugins.python.api.tree.Tree.Kind.*;
32+
33+
@Rule(key = "GCI96")
34+
public class PandasRequireUsecolsArgument extends PythonSubscriptionCheck {
35+
36+
public static final String DESCRIPTION = "Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns";
37+
private static final List<String> READ_METHODS = Arrays.asList(
38+
"read_csv", "read_parquet", "read_excel", "read_feather", "read_json"
39+
);
40+
41+
@Override
42+
public void initialize(Context context) {
43+
context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::visitCallExpression);
44+
}
45+
46+
public void visitCallExpression(SubscriptionContext ctx) {
47+
CallExpression callExpression = (CallExpression) ctx.syntaxNode();
48+
Expression callee = callExpression.callee();
49+
50+
if (callee.is(Tree.Kind.QUALIFIED_EXPR)) {
51+
QualifiedExpression qualifiedExpression = (QualifiedExpression) callee;
52+
String methodName = qualifiedExpression.name().name();
53+
54+
if (READ_METHODS.contains(methodName)) {
55+
56+
if (!hasColumnsSpecified(callExpression)) {
57+
ctx.addIssue(callExpression.firstToken(), DESCRIPTION);
58+
}
59+
}
60+
}
61+
}
62+
63+
private boolean hasColumnsSpecified(CallExpression callExpression) {
64+
List<Argument> arguments = callExpression.arguments();
65+
66+
for (Argument arg : arguments) {
67+
if (arg.is(REGULAR_ARGUMENT)) {
68+
RegularArgument regularArg = (RegularArgument) arg;
69+
String paramName = regularArg.keywordArgument() != null ? regularArg.keywordArgument().name() : null;
70+
if (paramName != null && (paramName.equals("usecols") || paramName.equals("columns"))) {
71+
return true;
72+
}
73+
}
74+
}
75+
return false;
76+
}
77+
}

src/main/resources/org/greencodeinitiative/creedengo/python/creedengo_way_profile.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
"GCI72",
1111
"GCI74",
1212
"GCI89",
13+
<<<<<<< HEAD
14+
"GCI96",
15+
=======
1316
"GCI97",
17+
>>>>>>> upstream/main
1418
"GCI203",
1519
"GCI404"
16-
]
20+
]
1721
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
3+
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
package org.greencodeinitiative.creedengo.python.checks;
19+
20+
import org.junit.Test;
21+
import org.sonar.python.checks.utils.PythonCheckVerifier;
22+
23+
public class PandasRequireUsecolsArgumentTest {
24+
25+
@Test
26+
public void test() {
27+
PythonCheckVerifier.verify("src/test/resources/checks/pandasRequireUsecolsArgument.py", new PandasRequireUsecolsArgument());
28+
}
29+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import pandas as pd
2+
3+
df1 = pd.read_csv('data.csv') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
4+
df2 = pd.read_parquet('data.parquet') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
5+
df3 = pd.read_excel('data.xlsx') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
6+
df4 = pd.read_json('data.json') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
7+
df5 = pd.read_feather('data.feather') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
8+
9+
df7 = pd.read_csv('data.csv', usecols=['col1', 'col2'])
10+
df8 = pd.read_parquet('data.parquet', columns=['col1', 'col2'])
11+
df9 = pd.read_excel('data.xlsx', usecols=[0, 1, 2])
12+
df10 = pd.read_json('data.json', columns=['col1', 'col2'])
13+
df11 = pd.read_feather('data.feather', columns=['col1', 'col2'])
14+
15+
import pandas as pandas_alias
16+
df14 = pandas_alias.read_csv('data.csv') # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
17+
df15 = pandas_alias.read_csv('data.csv', usecols=['col1'])
18+
19+
df16 = pd.read_csv('data.csv', sep=',', header=0) # Noncompliant {{Specify 'usecols' or 'columns' when reading a DataFrame using Pandas to load only necessary columns}}
20+
df17 = pd.read_csv('data.csv', sep=',', header=0, usecols=['col1', 'col2'])
21+
22+
cols_to_use = ['col1', 'col2', 'col3']
23+
df18 = pd.read_parquet('data.parquet', columns=cols_to_use)

0 commit comments

Comments
 (0)