Skip to content

Commit 28c73a7

Browse files
NEW(cpd): @W-16866826@: Implement java side CpdWrapper (#115)
1 parent 5b27f91 commit 28c73a7

File tree

14 files changed

+751
-39
lines changed

14 files changed

+751
-39
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"rimraf": "*"
2727
},
2828
"jest": {
29+
"testTimeout": 60000,
2930
"coverageThreshold": {
3031
"global": {
3132
"branches": 80,

packages/code-analyzer-pmd-engine/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"showcoverage": "npm run showcoverage-java && npm run showcoverage-typescript"
5959
},
6060
"jest": {
61+
"testTimeout": 60000,
6162
"preset": "ts-jest",
6263
"testEnvironment": "node",
6364
"testMatch": [
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.salesforce.sfca.cpdwrapper;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
/**
7+
* Java object to help us build cpd results that will be serializable to json format
8+
* The data structure that we will serialize is Map<String, List<CpdMatch>> which will contain matches for each language.
9+
*/
10+
public class CpdMatch {
11+
public int numTokensInBlock;
12+
public int numNonemptyLinesInBlock;
13+
public int numBlocks;
14+
public List<BlockLocation> blockLocations = new ArrayList<>();
15+
16+
public static class BlockLocation {
17+
public String file;
18+
public int startLine;
19+
public int startCol;
20+
public int endLine;
21+
public int endCol;
22+
}
23+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.salesforce.sfca.cpdwrapper;
2+
3+
import java.util.List;
4+
import java.util.Map;
5+
6+
/**
7+
* Data structure for the CpdRunner that we can deserialize the input json file into
8+
*/
9+
class CpdRunInputData {
10+
public Map<String, List<String>> filesToScanPerLanguage;
11+
public int minimumTokens;
12+
public boolean skipDuplicateFiles;
13+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package com.salesforce.sfca.cpdwrapper;
2+
3+
import net.sourceforge.pmd.cpd.CPDConfiguration;
4+
import net.sourceforge.pmd.cpd.CpdAnalysis;
5+
import net.sourceforge.pmd.cpd.Mark;
6+
import net.sourceforge.pmd.cpd.Match;
7+
import net.sourceforge.pmd.lang.Language;
8+
import net.sourceforge.pmd.lang.document.FileLocation;
9+
import net.sourceforge.pmd.reporting.Report;
10+
import net.sourceforge.pmd.util.log.PmdReporter;
11+
import org.slf4j.event.Level;
12+
13+
import javax.annotation.Nullable;
14+
import java.io.IOException;
15+
import java.nio.file.Path;
16+
import java.nio.file.Paths;
17+
import java.text.MessageFormat;
18+
import java.util.ArrayList;
19+
import java.util.HashMap;
20+
import java.util.List;
21+
import java.util.Map;
22+
import java.util.stream.Collectors;
23+
24+
/**
25+
* Class to help us invoke CPD - once for each language that should be processed
26+
*/
27+
class CpdRunner {
28+
public Map<String, List<CpdMatch>> run(CpdRunInputData runInputData) throws IOException {
29+
validateRunInputData(runInputData);
30+
31+
Map<String, List<CpdMatch>> results = new HashMap<>();
32+
33+
for (Map.Entry<String, List<String>> entry : runInputData.filesToScanPerLanguage.entrySet()) {
34+
String language = entry.getKey();
35+
List<String> filesToScan = entry.getValue();
36+
if (filesToScan.isEmpty()) {
37+
continue;
38+
}
39+
List<Path> pathsToScan = filesToScan.stream().map(Paths::get).collect(Collectors.toList());
40+
List<CpdMatch> languageMatches = runLanguage(language, pathsToScan, runInputData.minimumTokens, runInputData.skipDuplicateFiles);
41+
42+
if (!languageMatches.isEmpty()) {
43+
results.put(language, languageMatches);
44+
}
45+
}
46+
47+
return results;
48+
}
49+
50+
private List<CpdMatch> runLanguage(String language, List<Path> pathsToScan, int minimumTokens, boolean skipDuplicateFiles) throws IOException {
51+
// Note that the name "minimumTokens" comes from the public facing documentation and the cli but
52+
// behind the scenes, it maps to MinimumTileSize. To learn more about the mappings to the config, see:
53+
// https://github.com/pmd/pmd/blob/main/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
54+
CPDConfiguration config = new CPDConfiguration();
55+
Language cpdLanguageId = config.getLanguageRegistry().getLanguageById(language);
56+
if (cpdLanguageId == null) {
57+
throw new RuntimeException("The language \"" + language + "\" is not recognized by CPD.");
58+
}
59+
config.setOnlyRecognizeLanguage(cpdLanguageId);
60+
config.setMinimumTileSize(minimumTokens);
61+
config.setInputPathList(pathsToScan);
62+
config.setSkipDuplicates(skipDuplicateFiles);
63+
config.setReporter(new CpdErrorListener());
64+
65+
List<CpdMatch> cpdMatches = new ArrayList<>();
66+
67+
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
68+
cpd.performAnalysis(report -> {
69+
for (Report.ProcessingError processingError : report.getProcessingErrors()) {
70+
// We don't expect any processing errors, but if there are any, then we can push them
71+
// to stdOut so that they ultimately get logged. But we should continue as normal here.
72+
System.out.println("Unexpected CPD processing error: " + processingError.getError().getMessage());
73+
}
74+
for (Match match : report.getMatches()) {
75+
CpdMatch cpdMatch = new CpdMatch();
76+
cpdMatch.numBlocks = match.getMarkCount();
77+
cpdMatch.numTokensInBlock = match.getTokenCount();
78+
cpdMatch.numNonemptyLinesInBlock = match.getLineCount();
79+
80+
for (Mark mark : match.getMarkSet()) {
81+
CpdMatch.BlockLocation blockLocation = new CpdMatch.BlockLocation();
82+
FileLocation location = mark.getLocation();
83+
blockLocation.file = location.getFileId().getAbsolutePath();
84+
blockLocation.startLine = location.getStartLine();
85+
blockLocation.startCol = location.getStartColumn();
86+
blockLocation.endLine = location.getEndLine();
87+
blockLocation.endCol = location.getEndColumn();
88+
89+
cpdMatch.blockLocations.add(blockLocation);
90+
}
91+
92+
cpdMatches.add(cpdMatch);
93+
}
94+
});
95+
}
96+
97+
return cpdMatches;
98+
}
99+
100+
private void validateRunInputData(CpdRunInputData runInputData) {
101+
if (runInputData.filesToScanPerLanguage == null) {
102+
throw new RuntimeException("The \"filesToScanPerLanguage\" field was not set.");
103+
} else if (runInputData.filesToScanPerLanguage.isEmpty()) {
104+
throw new RuntimeException(("The \"filesToScanPerLanguage\" field was found to be empty."));
105+
} else if (runInputData.minimumTokens <= 0) {
106+
throw new RuntimeException("The \"minimumTokens\" field was not set to a positive number.");
107+
}
108+
}
109+
}
110+
111+
// This class simply helps us process any errors that may be thrown by CPD. By default, CPD suppresses errors so that
112+
// they are not thrown. So here, we look out for the errors that we care about and process it to throw a better
113+
// error messages. We override the logEx method in particular because all other error methods call through to logEx.
114+
class CpdErrorListener implements PmdReporter {
115+
@Override
116+
public void logEx(Level level, @javax.annotation.Nullable String s, Object[] objects, @Nullable Throwable throwable) {
117+
if (throwable != null) {
118+
throw new RuntimeException("CPD threw an unexpected exception:\n" + throwable.getMessage(), throwable);
119+
} else if (s != null) {
120+
String message = MessageFormat.format(s, objects);
121+
throw new RuntimeException("CPD threw an unexpected exception:\n" + message);
122+
}
123+
}
124+
125+
// These methods aren't needed or used, but they are required to be implemented (since the interface does not give them default implementations)
126+
@Override
127+
public boolean isLoggable(Level level) {
128+
return false;
129+
}
130+
@Override
131+
public int numErrors() {
132+
return 0;
133+
}
134+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package com.salesforce.sfca.cpdwrapper;
2+
3+
import java.io.FileReader;
4+
import java.io.FileWriter;
5+
import java.io.IOException;
6+
import java.util.Arrays;
7+
import java.util.List;
8+
import java.util.Map;
9+
10+
import com.google.gson.Gson;
11+
12+
/**
13+
* Provides following commands:
14+
* RUN:
15+
* - Runs the rules provided by the input ruleset file on a set of files and writes results to a JSON file
16+
* - Invocation: java -cp {classPath} com.salesforce.sfca.cpdwrapper.CpdWrapper run {argsInputFile} {resultsOutputFile}
17+
* - {classPath} is the list of entries to add to the class path
18+
* - {argsInputFile} is a JSON file containing the input arguments for the run command.
19+
* Example:
20+
* {
21+
* "filesToScanPerLanguage": {
22+
* "apex": ["/full/path/to/apex_file1.cls", "/full/path/to/apex_file2.trigger", ...],
23+
* ...,
24+
* "xml": ["full/path/to/xml_file1.xml", "/full/path/to/xml_file2.xml", ...]
25+
* },
26+
* "minimumTokens": 100,
27+
* "skipDuplicateFiles": false
28+
* }
29+
* - {resultsOutputFile} is a JSON file to write CPD results to.
30+
* Example:
31+
* {
32+
* "apex": [
33+
* {
34+
* "numTokensInBlock": 18,
35+
* "numNonemptyLinesInBlock": 5,
36+
* "numBlocks": 2,
37+
* "blockLocations": [
38+
* {
39+
* "file": "/full/path/to/file1.cls",
40+
* "startLine": 1, "startCol": 1, "endLine": 5, "endCol": 2
41+
* },
42+
* {
43+
* "file": "/full/path/to/file2.cls",
44+
* "startLine": 18, "startCol": 6, "endLine": 22, "endCol": 8
45+
* }
46+
* ]
47+
* },
48+
* ...
49+
* ],
50+
* "xml": ...
51+
* }
52+
*/
53+
public class CpdWrapper {
54+
public static void main(String[] args) {
55+
long startTime = System.currentTimeMillis();
56+
System.out.println("START OF CALL TO \"CpdWrapper\" WITH ARGUMENTS: " + String.join(" ", args));
57+
58+
if (args.length == 0) {
59+
throw new RuntimeException("Missing arguments to CpdWrapper.");
60+
} else if(args[0].equalsIgnoreCase("run")) {
61+
invokeRunCommand(Arrays.copyOfRange(args, 1, args.length));
62+
} else {
63+
throw new RuntimeException("Bad first argument to CpdWrapper. Expected \"run\". Received: \"" + args[0] + "\"");
64+
}
65+
66+
long endTime = System.currentTimeMillis();
67+
System.out.println("END OF CALL TO \"CpdWrapper\": " + (endTime - startTime) + " milliseconds");
68+
}
69+
70+
private static void invokeRunCommand(String[] args) {
71+
if (args.length != 2) {
72+
throw new RuntimeException("Invalid number of arguments following the \"run\" command. Expected 2 but received: " + args.length);
73+
}
74+
String argsInputFile = args[0];
75+
String resultsOutputFile = args[1];
76+
77+
Gson gson = new Gson();
78+
79+
CpdRunInputData inputData;
80+
try (FileReader reader = new FileReader(argsInputFile)) {
81+
inputData = gson.fromJson(reader, CpdRunInputData.class);
82+
} catch (Exception e) {
83+
throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\"", e);
84+
}
85+
86+
CpdRunner cpdRunner = new CpdRunner();
87+
Map<String, List<CpdMatch>> results;
88+
try {
89+
results = cpdRunner.run(inputData);
90+
} catch (Exception e) {
91+
throw new RuntimeException("Error while attempting to invoke CpdRunner.run: " + e.getMessage(), e);
92+
}
93+
94+
try (FileWriter fileWriter = new FileWriter(resultsOutputFile)) {
95+
gson.toJson(results, fileWriter);
96+
} catch (IOException e) {
97+
throw new RuntimeException(e);
98+
}
99+
}
100+
}

packages/code-analyzer-pmd-engine/pmd-wrapper/src/main/java/com/salesforce/sfca/pmdwrapper/PmdRuleDescriber.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.slf4j.event.Level;
1010

1111
import javax.annotation.Nullable;
12+
import java.text.MessageFormat;
1213
import java.util.*;
1314
import java.util.regex.Pattern;
1415
import java.util.regex.Matcher;
@@ -152,9 +153,9 @@ private static String getLimitedDescription(Rule rule) {
152153
}
153154
}
154155

155-
// This class simply helps us process any errors that may be thrown by PMD. By default PMD suppresses errors so that
156+
// This class simply helps us process any errors that may be thrown by PMD. By default, PMD suppresses errors so that
156157
// they are not thrown. So here, we look out for the errors that we care about and process it to throw a better
157-
// error messages.
158+
// error messages. We override the logEx method in particular because all other error methods call through to logEx.
158159
class PmdErrorListener implements PmdReporter {
159160
@Override
160161
public void logEx(Level level, @Nullable String s, Object[] objects, @Nullable Throwable throwable) {
@@ -174,6 +175,9 @@ public void logEx(Level level, @Nullable String s, Object[] objects, @Nullable T
174175
}
175176
}
176177
throw new RuntimeException("PMD threw an unexpected exception:\n" + message, throwable);
178+
} else if (s != null) {
179+
String message = MessageFormat.format(s, objects);
180+
throw new RuntimeException("PMD threw an unexpected exception:\n" + message);
177181
}
178182
}
179183

packages/code-analyzer-pmd-engine/pmd-wrapper/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
* Provides following commands:
1717
* DESCRIBE:
1818
* - Describes the available PMD rules by writing a list of PmdRuleInfo objects to a JSON file
19-
* - Invocation: java -cp {classPath} describe {outputFile} {languages}
19+
* - Invocation: java -cp {classPath} com.salesforce.sfca.pmdwrapper.PmdWrapper describe {outputFile} {languages}
2020
* - {classPath} is the list of entries to add to the class path
2121
* - {outputFile} is a file to write the array of PmdRuleInfo objects to in JSON format
2222
* - {languages} is a comma separated list of languages associated with the rules to describe
2323
* RUN:
2424
* - Runs the rules provided by the input ruleset file on a set of files and writes results to a JSON file
25-
* - Invocation: java -cp {classPath} run {ruleSetInputFile} {filesToScanInputFile} {resultsOutputFile}
25+
* - Invocation: java -cp {classPath} com.salesforce.sfca.pmdwrapper.PmdWrapper run {ruleSetInputFile} {filesToScanInputFile} {resultsOutputFile}
2626
* - {classPath} is the list of entries to add to the class path
2727
* - {ruleSetInputFile} is a PMD ruleset file that contains the rules to run
2828
* - {filesToScanInputFile} is a file containing a newline separated list of files to scan

0 commit comments

Comments
 (0)