Skip to content

Commit 52d81ac

Browse files
authored
Merge pull request #69 from cleophass/GCI95-python
GCI99 avoidCSVFormat #Python #DLG #Build
2 parents 238f78f + 2605edb commit 52d81ac

File tree

8 files changed

+239
-6
lines changed

8 files changed

+239
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- [#69](https://github.com/green-code-initiative/creedengo-python/pull/69) Add rule GCI99 Avoid CSV Format
1213
- [#76](https://github.com/green-code-initiative/creedengo-python/pull/76) Add rule GCI103 Dictionary Items Unused. A rule specifying that dictionary iteration should consider the pertinence of the element used.
1314
- [#79](https://github.com/green-code-initiative/creedengo-python/pull/79) Add rule GCI106 Avoid SQRT in a loop
1415
- [#71](https://github.com/green-code-initiative/creedengo-python/pull/71) Add rule GCI96 Require Usecols Argument in Pandas Read Functions

src/it/java/org/greencodeinitiative/creedengo/python/integration/tests/GCIRulesIT.java

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
*/
1818
package org.greencodeinitiative.creedengo.python.integration.tests;
1919

20-
import org.junit.jupiter.api.Test;
21-
import org.sonarqube.ws.Issues;
22-
import org.sonarqube.ws.Measures;
20+
import static java.util.Optional.ofNullable;
21+
import static org.assertj.core.api.Assertions.assertThat;
2322

2423
import java.util.List;
2524
import java.util.Map;
2625

27-
import static java.util.Optional.ofNullable;
28-
import static org.assertj.core.api.Assertions.assertThat;
26+
import org.junit.jupiter.api.Test;
27+
import org.sonarqube.ws.Issues;
28+
import org.sonarqube.ws.Measures;
2929

3030
class GCIRulesIT extends GCIRulesBase {
3131

@@ -320,6 +320,21 @@ void testGCI103(){
320320

321321
}
322322

323+
@Test
324+
void testGCI99(){
325+
String filePath = "src/avoidCSVFormat.py";
326+
String ruleId = "creedengo-python:GCI99";
327+
String ruleMsg = "Use Parquet or Feather format instead of CSV";
328+
int[] startLines = new int[]{
329+
4, 6, 10, 12, 14, 15, 17, 18, 23, 39, 47, 48
330+
};
331+
int[] endLines = new int[]{
332+
4, 6, 10, 12, 14, 15, 17, 18, 23, 39, 47, 48
333+
};
334+
335+
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_50MIN);
336+
}
337+
323338
@Test
324339
void testGCI106() {
325340
String filePath = "src/avoidSqrtInLoop.py";
@@ -334,4 +349,6 @@ void testGCI106() {
334349
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_5MIN);
335350
}
336351

352+
353+
337354
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pandas as pd
2+
import pandas as pandas_alias
3+
4+
df = pd.read_csv('data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
5+
6+
df.to_csv('output.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
7+
8+
df = pd.read_parquet('data.parquet')
9+
10+
path_to_file = 'MNIST.csv' # Noncompliant {{Use Parquet or Feather format instead of CSV}}
11+
12+
df2 = pandas_alias.read_csv('another_data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
13+
14+
with open('data.csv') as f: # Noncompliant {{Use Parquet or Feather format instead of CSV}}
15+
df3 = pd.read_csv(f) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
16+
17+
df4 = pd.read_csv( # Noncompliant {{Use Parquet or Feather format instead of CSV}}
18+
'complex_data.csv', # Noncompliant {{Use Parquet or Feather format instead of CSV}}
19+
sep=',',
20+
header=0
21+
)
22+
23+
df4.to_csv("output.csv") # Noncompliant {{Use Parquet or Feather format instead of CSV}}
24+
25+
df5 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
26+
27+
other_path = 'data.json'
28+
29+
df6 = pd.read_json(other_path)
30+
31+
df7 = pd.read_feather('features.feather')
32+
33+
df8 = pd.read_parquet("file.parquet")
34+
35+
df9 = pandas_alias.read_feather("something.feather")
36+
37+
df10 = pandas_alias.read_parquet("nested/dir/file.parquet")
38+
39+
result = pd.read_csv("log.csv", encoding='utf-8') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
40+
41+
df11 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
42+
df11.to_parquet("output.parquet")
43+
44+
df12 = pd.DataFrame({'x': [5, 6]})
45+
df12.to_feather("output.feather")
46+
47+
filename = "report.csv" # Noncompliant {{Use Parquet or Feather format instead of CSV}}
48+
data = pd.read_csv(filename) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
49+
50+
log_file = "logfile.log"
51+
df13 = pd.read_table(log_file, delimiter='|')

src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ public class PythonRuleRepository implements RulesDefinition, PythonCustomRuleRe
4444
PandasRequireUsecolsArgument.class,
4545
OptimizeSquareComputation.class,
4646
AvoidSqrtInLoop.class,
47-
DictionaryItemsUnused.class
47+
DictionaryItemsUnused.class,
48+
AvoidCSVFormat.class
4849
);
4950

5051
public static final String LANGUAGE = "py";
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
3+
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
package org.greencodeinitiative.creedengo.python.checks;
19+
20+
import org.sonar.plugins.python.api.SubscriptionContext;
21+
import org.sonar.plugins.python.api.tree.CallExpression;
22+
import org.sonar.plugins.python.api.tree.QualifiedExpression;
23+
import org.sonar.plugins.python.api.tree.StringLiteral;
24+
import org.sonar.plugins.python.api.tree.Tree;
25+
import org.sonar.plugins.python.api.tree.Expression;
26+
import org.sonar.check.Rule;
27+
28+
import java.util.HashSet;
29+
import java.util.Set;
30+
import java.util.regex.Matcher;
31+
import java.util.regex.Pattern;
32+
33+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
34+
35+
@Rule(key = "GCI99")
36+
public class AvoidCSVFormat extends PythonSubscriptionCheck {
37+
38+
public static final String DESCRIPTION = "Use Parquet or Feather format instead of CSV";
39+
protected static final Pattern CSV_EXTENSION = Pattern.compile("\\.csv");
40+
private final Set<Integer> reportedLines = new HashSet<>();
41+
42+
@Override
43+
public void initialize(Context context) {
44+
context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::visitCallExpression);
45+
context.registerSyntaxNodeConsumer(Tree.Kind.STRING_LITERAL, this::visitNodeString);
46+
}
47+
48+
public void visitCallExpression(SubscriptionContext ctx) {
49+
CallExpression callExpression = (CallExpression) ctx.syntaxNode();
50+
Expression callee = callExpression.callee();
51+
52+
if (callee.is(Tree.Kind.QUALIFIED_EXPR)) {
53+
QualifiedExpression qualifiedExpression = (QualifiedExpression) callee;
54+
String methodName = qualifiedExpression.name().name();
55+
56+
if (methodName.equals("read_csv") || methodName.equals("to_csv")) {
57+
int line = callExpression.firstToken().line();
58+
59+
if (!reportedLines.contains(line)) {
60+
reportedLines.add(line);
61+
ctx.addIssue(callExpression.firstToken(), DESCRIPTION);
62+
}
63+
}
64+
}
65+
}
66+
67+
public void visitNodeString(SubscriptionContext ctx) {
68+
StringLiteral stringLiteral = (StringLiteral) ctx.syntaxNode();
69+
int line = stringLiteral.firstToken().line();
70+
71+
if (reportedLines.contains(line)) {
72+
return;
73+
}
74+
String strValue = stringLiteral.trimmedQuotesValue();
75+
Matcher matcher = CSV_EXTENSION.matcher(strValue);
76+
77+
if (matcher.find()) {
78+
reportedLines.add(line);
79+
ctx.addIssue(stringLiteral, DESCRIPTION);
80+
}
81+
}
82+
}

src/main/resources/org/greencodeinitiative/creedengo/python/creedengo_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"GCI89",
1313
"GCI96",
1414
"GCI97",
15+
"GCI99",
1516
"GCI103",
1617
"GCI106",
1718
"GCI203",
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
3+
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
package org.greencodeinitiative.creedengo.python.checks;
19+
20+
import org.junit.Test;
21+
import org.sonar.python.checks.utils.PythonCheckVerifier;
22+
23+
public class AvoidCSVFormatTest {
24+
25+
@Test
26+
public void test() {
27+
PythonCheckVerifier.verify("src/test/resources/checks/avoidCSVFormat.py", new AvoidCSVFormat());
28+
}
29+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pandas as pd
2+
import pandas as pandas_alias
3+
4+
df = pd.read_csv('data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
5+
6+
df.to_csv('output.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
7+
8+
df = pd.read_parquet('data.parquet')
9+
10+
path_to_file = 'MNIST.csv' # Noncompliant {{Use Parquet or Feather format instead of CSV}}
11+
12+
df2 = pandas_alias.read_csv('another_data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
13+
14+
with open('data.csv') as f: # Noncompliant {{Use Parquet or Feather format instead of CSV}}
15+
df3 = pd.read_csv(f) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
16+
17+
df4 = pd.read_csv( # Noncompliant {{Use Parquet or Feather format instead of CSV}}
18+
'complex_data.csv', # Noncompliant {{Use Parquet or Feather format instead of CSV}}
19+
sep=',',
20+
header=0
21+
)
22+
23+
df4.to_csv("output.csv") # Noncompliant {{Use Parquet or Feather format instead of CSV}}
24+
25+
df5 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
26+
27+
other_path = 'data.json'
28+
29+
df6 = pd.read_json(other_path)
30+
31+
df7 = pd.read_feather('features.feather')
32+
33+
df8 = pd.read_parquet("file.parquet")
34+
35+
df9 = pandas_alias.read_feather("something.feather")
36+
37+
df10 = pandas_alias.read_parquet("nested/dir/file.parquet")
38+
39+
result = pd.read_csv("log.csv", encoding='utf-8') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
40+
41+
df11 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
42+
df11.to_parquet("output.parquet")
43+
44+
df12 = pd.DataFrame({'x': [5, 6]})
45+
df12.to_feather("output.feather")
46+
47+
filename = "report.csv" # Noncompliant {{Use Parquet or Feather format instead of CSV}}
48+
data = pd.read_csv(filename) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
49+
50+
log_file = "logfile.log"
51+
df13 = pd.read_table(log_file, delimiter='|')

0 commit comments

Comments
 (0)