1+ package com .salesforce .sfca .cpdwrapper ;
2+
3+ import net .sourceforge .pmd .cpd .CPDConfiguration ;
4+ import net .sourceforge .pmd .cpd .CpdAnalysis ;
5+ import net .sourceforge .pmd .cpd .Mark ;
6+ import net .sourceforge .pmd .cpd .Match ;
7+ import net .sourceforge .pmd .lang .Language ;
8+ import net .sourceforge .pmd .lang .document .FileLocation ;
9+ import net .sourceforge .pmd .reporting .Report ;
10+ import net .sourceforge .pmd .util .log .PmdReporter ;
11+ import org .slf4j .event .Level ;
12+
13+ import javax .annotation .Nullable ;
14+ import java .io .IOException ;
15+ import java .nio .file .Path ;
16+ import java .nio .file .Paths ;
17+ import java .text .MessageFormat ;
18+ import java .util .ArrayList ;
19+ import java .util .HashMap ;
20+ import java .util .List ;
21+ import java .util .Map ;
22+ import java .util .stream .Collectors ;
23+
24+ /**
25+ * Class to help us invoke CPD - once for each language that should be processed
26+ */
27+ class CpdRunner {
28+ public Map <String , List <CpdMatch >> run (CpdRunInputData runInputData ) throws IOException {
29+ validateRunInputData (runInputData );
30+
31+ Map <String , List <CpdMatch >> results = new HashMap <>();
32+
33+ for (Map .Entry <String , List <String >> entry : runInputData .filesToScanPerLanguage .entrySet ()) {
34+ String language = entry .getKey ();
35+ List <String > filesToScan = entry .getValue ();
36+ if (filesToScan .isEmpty ()) {
37+ continue ;
38+ }
39+ List <Path > pathsToScan = filesToScan .stream ().map (Paths ::get ).collect (Collectors .toList ());
40+ List <CpdMatch > languageMatches = runLanguage (language , pathsToScan , runInputData .minimumTokens , runInputData .skipDuplicateFiles );
41+
42+ if (!languageMatches .isEmpty ()) {
43+ results .put (language , languageMatches );
44+ }
45+ }
46+
47+ return results ;
48+ }
49+
50+ private List <CpdMatch > runLanguage (String language , List <Path > pathsToScan , int minimumTokens , boolean skipDuplicateFiles ) throws IOException {
51+ // Note that the name "minimumTokens" comes from the public facing documentation and the cli but
52+ // behind the scenes, it maps to MinimumTileSize. To learn more about the mappings to the config, see:
53+ // https://github.com/pmd/pmd/blob/main/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
54+ CPDConfiguration config = new CPDConfiguration ();
55+ Language cpdLanguageId = config .getLanguageRegistry ().getLanguageById (language );
56+ if (cpdLanguageId == null ) {
57+ throw new RuntimeException ("The language \" " + language + "\" is not recognized by CPD." );
58+ }
59+ config .setOnlyRecognizeLanguage (cpdLanguageId );
60+ config .setMinimumTileSize (minimumTokens );
61+ config .setInputPathList (pathsToScan );
62+ config .setSkipDuplicates (skipDuplicateFiles );
63+ config .setReporter (new CpdErrorListener ());
64+
65+ List <CpdMatch > cpdMatches = new ArrayList <>();
66+
67+ try (CpdAnalysis cpd = CpdAnalysis .create (config )) {
68+ cpd .performAnalysis (report -> {
69+ for (Report .ProcessingError processingError : report .getProcessingErrors ()) {
70+ // We don't expect any processing errors, but if there are any, then we can push them
71+ // to stdOut so that they ultimately get logged. But we should continue as normal here.
72+ System .out .println ("Unexpected CPD processing error: " + processingError .getError ().getMessage ());
73+ }
74+ for (Match match : report .getMatches ()) {
75+ CpdMatch cpdMatch = new CpdMatch ();
76+ cpdMatch .numBlocks = match .getMarkCount ();
77+ cpdMatch .numTokensInBlock = match .getTokenCount ();
78+ cpdMatch .numNonemptyLinesInBlock = match .getLineCount ();
79+
80+ for (Mark mark : match .getMarkSet ()) {
81+ CpdMatch .BlockLocation blockLocation = new CpdMatch .BlockLocation ();
82+ FileLocation location = mark .getLocation ();
83+ blockLocation .file = location .getFileId ().getAbsolutePath ();
84+ blockLocation .startLine = location .getStartLine ();
85+ blockLocation .startCol = location .getStartColumn ();
86+ blockLocation .endLine = location .getEndLine ();
87+ blockLocation .endCol = location .getEndColumn ();
88+
89+ cpdMatch .blockLocations .add (blockLocation );
90+ }
91+
92+ cpdMatches .add (cpdMatch );
93+ }
94+ });
95+ }
96+
97+ return cpdMatches ;
98+ }
99+
100+ private void validateRunInputData (CpdRunInputData runInputData ) {
101+ if (runInputData .filesToScanPerLanguage == null ) {
102+ throw new RuntimeException ("The \" filesToScanPerLanguage\" field was not set." );
103+ } else if (runInputData .filesToScanPerLanguage .isEmpty ()) {
104+ throw new RuntimeException (("The \" filesToScanPerLanguage\" field was found to be empty." ));
105+ } else if (runInputData .minimumTokens <= 0 ) {
106+ throw new RuntimeException ("The \" minimumTokens\" field was not set to a positive number." );
107+ }
108+ }
109+ }
110+
111+ // This class simply helps us process any errors that may be thrown by CPD. By default, CPD suppresses errors so that
112+ // they are not thrown. So here, we look out for the errors that we care about and process it to throw a better
113+ // error messages. We override the logEx method in particular because all other error methods call through to logEx.
114+ class CpdErrorListener implements PmdReporter {
115+ @ Override
116+ public void logEx (Level level , @ javax .annotation .Nullable String s , Object [] objects , @ Nullable Throwable throwable ) {
117+ if (throwable != null ) {
118+ throw new RuntimeException ("CPD threw an unexpected exception:\n " + throwable .getMessage (), throwable );
119+ } else if (s != null ) {
120+ String message = MessageFormat .format (s , objects );
121+ throw new RuntimeException ("CPD threw an unexpected exception:\n " + message );
122+ }
123+ }
124+
125+ // These methods aren't needed or used, but they are required to be implemented (since the interface does not give them default implementations)
126+ @ Override
127+ public boolean isLoggable (Level level ) {
128+ return false ;
129+ }
130+ @ Override
131+ public int numErrors () {
132+ return 0 ;
133+ }
134+ }
0 commit comments