Skip to content

Commit bd8ecff

Browse files
GCI99 AvoidCSVFormat Simplify regex pattern from \.(csv) to \.csv (remove unnecessary capture group) and add IT.
Co-authored-by: DataLabGroupe-CreditAgricole <[email protected]>
1 parent 3052cb1 commit bd8ecff

File tree

4 files changed

+78
-15
lines changed

4 files changed

+78
-15
lines changed

src/it/java/org/greencodeinitiative/creedengo/python/integration/tests/GCIRulesIT.java

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
*/
1818
package org.greencodeinitiative.creedengo.python.integration.tests;
1919

20-
import org.junit.jupiter.api.Test;
21-
import org.sonarqube.ws.Issues;
22-
import org.sonarqube.ws.Measures;
20+
import static java.util.Optional.ofNullable;
21+
import static org.assertj.core.api.Assertions.assertThat;
2322

2423
import java.util.List;
2524
import java.util.Map;
2625

27-
import static java.util.Optional.ofNullable;
28-
import static org.assertj.core.api.Assertions.assertThat;
26+
import org.junit.jupiter.api.Test;
27+
import org.sonarqube.ws.Issues;
28+
import org.sonarqube.ws.Measures;
2929

3030
class GCIRulesIT extends GCIRulesBase {
3131

@@ -303,6 +303,21 @@ void testGCI97(){
303303
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_1MIN);
304304
}
305305

306+
@Test
307+
void testGCI99(){
308+
String filePath = "src/avoidCSVFormat.py";
309+
String ruleId = "creedengo-python:GCI99";
310+
String ruleMsg = "Use Parquet or Feather format instead of CSV";
311+
int[] startLines = new int[]{
312+
4, 6, 10, 12, 14, 15, 17, 18, 23, 39, 47, 48
313+
};
314+
int[] endLines = new int[]{
315+
4, 6, 10, 12, 14, 15, 17, 18, 23, 39, 47, 48
316+
};
317+
318+
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_50MIN);
319+
}
320+
306321
@Test
307322
void testGCI106() {
308323
String filePath = "src/avoidSqrtInLoop.py";
@@ -317,4 +332,6 @@ void testGCI106() {
317332
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_5MIN);
318333
}
319334

335+
336+
320337
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pandas as pd
2+
import pandas as pandas_alias
3+
4+
df = pd.read_csv('data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
5+
6+
df.to_csv('output.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
7+
8+
df = pd.read_parquet('data.parquet')
9+
10+
path_to_file = 'MNIST.csv' # Noncompliant {{Use Parquet or Feather format instead of CSV}}
11+
12+
df2 = pandas_alias.read_csv('another_data.csv') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
13+
14+
with open('data.csv') as f: # Noncompliant {{Use Parquet or Feather format instead of CSV}}
15+
df3 = pd.read_csv(f) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
16+
17+
df4 = pd.read_csv( # Noncompliant {{Use Parquet or Feather format instead of CSV}}
18+
'complex_data.csv', # Noncompliant {{Use Parquet or Feather format instead of CSV}}
19+
sep=',',
20+
header=0
21+
)
22+
23+
df4.to_csv("output.csv") # Noncompliant {{Use Parquet or Feather format instead of CSV}}
24+
25+
df5 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
26+
27+
other_path = 'data.json'
28+
29+
df6 = pd.read_json(other_path)
30+
31+
df7 = pd.read_feather('features.feather')
32+
33+
df8 = pd.read_parquet("file.parquet")
34+
35+
df9 = pandas_alias.read_feather("something.feather")
36+
37+
df10 = pandas_alias.read_parquet("nested/dir/file.parquet")
38+
39+
result = pd.read_csv("log.csv", encoding='utf-8') # Noncompliant {{Use Parquet or Feather format instead of CSV}}
40+
41+
df11 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
42+
df11.to_parquet("output.parquet")
43+
44+
df12 = pd.DataFrame({'x': [5, 6]})
45+
df12.to_feather("output.feather")
46+
47+
filename = "report.csv" # Noncompliant {{Use Parquet or Feather format instead of CSV}}
48+
data = pd.read_csv(filename) # Noncompliant {{Use Parquet or Feather format instead of CSV}}
49+
50+
log_file = "logfile.log"
51+
df13 = pd.read_table(log_file, delimiter='|')

src/main/java/org/greencodeinitiative/creedengo/python/checks/AvoidCSVFormat.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,12 @@
3131
import java.util.regex.Pattern;
3232

3333
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
34-
@Rule(key = "GCI99")
3534

35+
@Rule(key = "GCI99")
3636
public class AvoidCSVFormat extends PythonSubscriptionCheck {
3737

3838
public static final String DESCRIPTION = "Use Parquet or Feather format instead of CSV";
39-
protected static final Pattern CSV_EXTENSION = Pattern.compile("\\.(csv)");
40-
39+
protected static final Pattern CSV_EXTENSION = Pattern.compile("\\.csv");
4140
private final Set<Integer> reportedLines = new HashSet<>();
4241

4342
@Override
@@ -48,18 +47,14 @@ public void initialize(Context context) {
4847

4948
public void visitCallExpression(SubscriptionContext ctx) {
5049
CallExpression callExpression = (CallExpression) ctx.syntaxNode();
51-
5250
Expression callee = callExpression.callee();
5351

54-
55-
5652
if (callee.is(Tree.Kind.QUALIFIED_EXPR)) {
5753
QualifiedExpression qualifiedExpression = (QualifiedExpression) callee;
5854
String methodName = qualifiedExpression.name().name();
59-
55+
6056
if (methodName.equals("read_csv") || methodName.equals("to_csv")) {
6157
int line = callExpression.firstToken().line();
62-
6358

6459
if (!reportedLines.contains(line)) {
6560
reportedLines.add(line);
@@ -73,13 +68,12 @@ public void visitNodeString(SubscriptionContext ctx) {
7368
StringLiteral stringLiteral = (StringLiteral) ctx.syntaxNode();
7469
int line = stringLiteral.firstToken().line();
7570

76-
7771
if (reportedLines.contains(line)) {
7872
return;
7973
}
80-
8174
String strValue = stringLiteral.trimmedQuotesValue();
8275
Matcher matcher = CSV_EXTENSION.matcher(strValue);
76+
8377
if (matcher.find()) {
8478
reportedLines.add(line);
8579
ctx.addIssue(stringLiteral, DESCRIPTION);

src/main/resources/org/greencodeinitiative/creedengo/python/creedengo_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"GCI89",
1313
"GCI96",
1414
"GCI97",
15+
"GCI99",
1516
"GCI106",
1617
"GCI203",
1718
"GCI404"

0 commit comments

Comments
 (0)