From 1f3f21b255760d358bc22e880952eeb402e0c567 Mon Sep 17 00:00:00 2001 From: Stephen Carter Date: Fri, 6 Dec 2024 17:47:23 -0500 Subject: [PATCH] CHANGE(cpd): @W-17386401@: Update cpd engine's minimum_tokens field to be an object with values per language --- .../sfca/cpdwrapper/CpdRunInputData.java | 8 +- .../salesforce/sfca/cpdwrapper/CpdRunner.java | 42 ++++--- .../sfca/cpdwrapper/CpdWrapper.java | 13 +- .../sfca/cpdwrapper/CpdWrapperTest.java | 119 +++++++++++++----- .../code-analyzer-pmd-engine/src/config.ts | 53 ++++++-- .../src/cpd-engine.ts | 26 ++-- .../src/cpd-wrapper.ts | 8 +- .../code-analyzer-pmd-engine/src/messages.ts | 13 +- .../test/cpd-engine.test.ts | 4 +- .../test/plugin.test.ts | 45 +++++-- 10 files changed, 233 insertions(+), 98 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunInputData.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunInputData.java index 13fdd441..a9c2fa79 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunInputData.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunInputData.java @@ -7,7 +7,11 @@ * Data structure for the CpdRunner that we can deserialize the input json file into */ class CpdRunInputData { - public Map> filesToScanPerLanguage; - public int minimumTokens; + public Map runDataPerLanguage; public boolean skipDuplicateFiles; } + +class LanguageSpecificRunData { + public List filesToScan; + public int minimumTokens; +} \ No newline at end of file diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunner.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunner.java index e86bed53..9648726c 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunner.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdRunner.java @@ -18,10 +18,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; /** @@ -33,18 +30,15 @@ class CpdRunner { public Map run(CpdRunInputData runInputData) throws IOException { validateRunInputData(runInputData); - List languagesToProcess = runInputData.filesToScanPerLanguage.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) // Keep only non-empty lists - .map(Map.Entry::getKey) - .collect(Collectors.toList()); - + List languagesToProcess = new ArrayList<>(runInputData.runDataPerLanguage.keySet()); progressReporter.initialize(languagesToProcess); Map results = new HashMap<>(); for (String language : languagesToProcess) { - List filesToScan = runInputData.filesToScanPerLanguage.get(language); - List pathsToScan = filesToScan.stream().map(Paths::get).collect(Collectors.toList()); - CpdLanguageRunResults languageRunResults = runLanguage(language, pathsToScan, runInputData.minimumTokens, runInputData.skipDuplicateFiles); + LanguageSpecificRunData languageSpecificRunData = runInputData.runDataPerLanguage.get(language); + List pathsToScan = languageSpecificRunData.filesToScan.stream().map(Paths::get).collect(Collectors.toList()); + CpdLanguageRunResults languageRunResults = runLanguage( + language, pathsToScan, languageSpecificRunData.minimumTokens, runInputData.skipDuplicateFiles); if (!languageRunResults.matches.isEmpty() || !languageRunResults.processingErrors.isEmpty()) { results.put(language, languageRunResults); } @@ -126,12 +120,24 @@ private CpdLanguageRunResults runLanguage(String language, List pathsToSca } private void validateRunInputData(CpdRunInputData runInputData) { - if (runInputData.filesToScanPerLanguage == null) { - throw new RuntimeException("The \"filesToScanPerLanguage\" field was not set."); - } else if (runInputData.filesToScanPerLanguage.isEmpty()) { - throw new RuntimeException(("The \"filesToScanPerLanguage\" field was found to be empty.")); - } else if (runInputData.minimumTokens <= 0) { - throw new RuntimeException("The \"minimumTokens\" field was not set to a positive number."); + if (runInputData.runDataPerLanguage == null) { + throw new RuntimeException("The \"runDataPerLanguage\" field was not set."); + } + + Set> entries = runInputData.runDataPerLanguage.entrySet(); + if (entries.isEmpty()) { + throw new RuntimeException("The \"runDataPerLanguage\" field didn't have any languages listed."); + } + + for (Map.Entry entry: entries) { + String language = entry.getKey(); + LanguageSpecificRunData languageSpecificRunData = entry.getValue(); + + if (languageSpecificRunData.filesToScan == null || languageSpecificRunData.filesToScan.isEmpty()) { + throw new RuntimeException(("The \"filesToScan\" field was missing or empty for language: " + language)); + } else if (languageSpecificRunData.minimumTokens <= 0) { + throw new RuntimeException("The \"minimumTokens\" field was not set to a positive number for language: " + language); + } } } diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdWrapper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdWrapper.java index 6481146d..683a56b9 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdWrapper.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/cpdwrapper/CpdWrapper.java @@ -17,12 +17,17 @@ * - {argsInputFile} is a JSON file containing the input arguments for the run command. * Example: * { - * "filesToScanPerLanguage": { - * "apex": ["/full/path/to/apex_file1.cls", "/full/path/to/apex_file2.trigger", ...], + * "runDataPerLanguage": { + * "apex": { + * "filesToScan": ["/full/path/to/apex_file1.cls", "/full/path/to/apex_file2.trigger", ...], + * "minimumTokens": 100 + * }, * ..., - * "xml": ["full/path/to/xml_file1.xml", "/full/path/to/xml_file2.xml", ...] + * "xml": { + * "filesToScan": ["full/path/to/xml_file1.xml", "/full/path/to/xml_file2.xml", ...], + * "minimumTokens": 150 + * } * }, - * "minimumTokens": 100, * "skipDuplicateFiles": false * } * - {resultsOutputFile} is a JSON file to write CPD results to. diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/cpdwrapper/CpdWrapperTest.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/cpdwrapper/CpdWrapperTest.java index bd6ca004..3dac87c8 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/cpdwrapper/CpdWrapperTest.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/cpdwrapper/CpdWrapperTest.java @@ -99,9 +99,8 @@ void whenCallingMainWithRunAndInputFileThatDoesNotContainValidJson_thenError(@Te } @Test - void whenCallingRunWithMissingField_filesToScanPerLanguage_thenError(@TempDir Path tempDir) throws Exception { + void whenCallingRunWithMissingField_runDataPerLanguage_thenError(@TempDir Path tempDir) throws Exception { String inputFileContents = "{" + - " \"minimumTokens\": 100, " + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -109,15 +108,51 @@ void whenCallingRunWithMissingField_filesToScanPerLanguage_thenError(@TempDir Pa String[] args = {"run", inputFile, "/does/not/matter"}; RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); assertThat(thrown.getMessage(), is( - "Error while attempting to invoke CpdRunner.run: The \"filesToScanPerLanguage\" field was not set.")); + "Error while attempting to invoke CpdRunner.run: The \"runDataPerLanguage\" field was not set.")); + } + + @Test + void whenCallingRunWithMissingField_filesToScan_thenError(@TempDir Path tempDir) throws Exception { + String inputFileContents = "{" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"minimumTokens\": 100 " + + " }" + + " }," + + " \"skipDuplicateFiles\": false " + + "}"; + String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); + + String[] args = {"run", inputFile, "/does/not/matter"}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); + assertThat(thrown.getMessage(), is( + "Error while attempting to invoke CpdRunner.run: The \"filesToScan\" field was missing or empty for language: apex")); + } + + @Test + void whenCallingRunWithEmptyArrayFor_filesToScan_thenError(@TempDir Path tempDir) throws Exception { + String inputFileContents = "{" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": []," + + " \"minimumTokens\": 100 " + + " }" + + " }," + + " \"skipDuplicateFiles\": false " + + "}"; + String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); + + String[] args = {"run", inputFile, "/does/not/matter"}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); + assertThat(thrown.getMessage(), is( + "Error while attempting to invoke CpdRunner.run: The \"filesToScan\" field was missing or empty for language: apex")); } @Test void whenCallingRunWithZeroLanguages_thenError(@TempDir Path tempDir) throws Exception { String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + + " \"runDataPerLanguage\": {" + " }," + - " \"minimumTokens\": 120," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -125,17 +160,19 @@ void whenCallingRunWithZeroLanguages_thenError(@TempDir Path tempDir) throws Exc String[] args = {"run", inputFile, "/does/not/matter"}; RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); assertThat(thrown.getMessage(), is( - "Error while attempting to invoke CpdRunner.run: The \"filesToScanPerLanguage\" field was found to be empty.")); + "Error while attempting to invoke CpdRunner.run: The \"runDataPerLanguage\" field didn't have any languages listed.")); } @Test void whenCallingRunWithInvalidLanguage_thenError(@TempDir Path tempDir) throws Exception { String dummyFile = createTempFile(tempDir, "dummy", ""); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"unknownLanguage\": [\"" + makePathJsonSafe(dummyFile) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"unknownLanguage\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(dummyFile) + "\"]," + + " \"minimumTokens\": 120" + + " }" + " }," + - " \"minimumTokens\": 120," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -150,8 +187,10 @@ void whenCallingRunWithInvalidLanguage_thenError(@TempDir Path tempDir) throws E void whenCallingRunWithMissingField_minimumTokens_thenError(@TempDir Path tempDir) throws Exception { String dummyApexFile = createTempFile(tempDir, "dummy.cls", ""); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(dummyApexFile) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(dummyApexFile) + "\"]" + + " }" + " }," + " \"skipDuplicateFiles\": false " + "}"; @@ -160,17 +199,19 @@ void whenCallingRunWithMissingField_minimumTokens_thenError(@TempDir Path tempDi String[] args = {"run", inputFile, "/does/not/matter"}; RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); assertThat(thrown.getMessage(), is( - "Error while attempting to invoke CpdRunner.run: The \"minimumTokens\" field was not set to a positive number.")); + "Error while attempting to invoke CpdRunner.run: The \"minimumTokens\" field was not set to a positive number for language: apex")); } @Test void whenCallingRunWithNegativeMinimumTokensValue_thenError(@TempDir Path tempDir) throws Exception { String dummyApexFile = createTempFile(tempDir, "dummy.cls", ""); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(dummyApexFile) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(dummyApexFile) + "\"]," + + " \"minimumTokens\": -1" + + " }" + " }," + - " \"minimumTokens\": -1," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -178,17 +219,19 @@ void whenCallingRunWithNegativeMinimumTokensValue_thenError(@TempDir Path tempDi String[] args = {"run", inputFile, "/does/not/matter"}; RuntimeException thrown = assertThrows(RuntimeException.class, () -> callCpdWrapper(args)); assertThat(thrown.getMessage(), is( - "Error while attempting to invoke CpdRunner.run: The \"minimumTokens\" field was not set to a positive number.")); + "Error while attempting to invoke CpdRunner.run: The \"minimumTokens\" field was not set to a positive number for language: apex")); } @Test void whenCallingRunWithFileToScanThatDoesNotExist_thenExceptionIsForwardedAsProcessingErrorWithTerminatingExceptionMarker(@TempDir Path tempDir) throws Exception { String doesNotExist = tempDir.resolve("doesNotExist.cls").toAbsolutePath().toString(); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(doesNotExist) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(doesNotExist) + "\"]," + + " \"minimumTokens\": 100" + + " }" + " }," + - " \"minimumTokens\": 100," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -212,11 +255,16 @@ void whenCallingRunWithValidFilesThatHaveDuplicates_thenJsonOutputShouldContainR String jsFile2 = createTempFile(tempDir, "jsFile2.js", SAMPLE_JS_2); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(apexFile1) + "\", \"" + makePathJsonSafe(apexFile2) + "\"]," + - " \"ecmascript\": [\"" + makePathJsonSafe(jsFile1) + "\", \"" + makePathJsonSafe(jsFile2) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(apexFile1) + "\", \"" + makePathJsonSafe(apexFile2) + "\"]," + + " \"minimumTokens\": 5" + + " }," + + " \"ecmascript\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(jsFile1) + "\", \"" + makePathJsonSafe(jsFile2) + "\"]," + + " \"minimumTokens\": 13" + + " }" + " }," + - " \"minimumTokens\": 5," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -344,10 +392,12 @@ void whenCallingRunWithValidFilesHaveZeroDuplicatesSinceMinTokensIsHigh_thenJson String apexFile2 = createTempFile(tempDir, "ApexClass2.cls", SAMPLE_APEX_2); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(apexFile1) + "\", \"" + makePathJsonSafe(apexFile2) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(apexFile1) + "\", \"" + makePathJsonSafe(apexFile2) + "\"]," + + " \"minimumTokens\": 500" + + " }" + " }," + - " \"minimumTokens\": 500," + // This is why there are no dups found " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -370,11 +420,12 @@ void whenCallingRunWithTwoIdenticalFilesButSkipDuplicateFilesIsFalse_thenJsonOut String apexFileInSubFolder = createTempFile(subFolder, "ApexClass1.cls", SAMPLE_APEX_1); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(apexFileInParentFolder) + "\", \"" + makePathJsonSafe(apexFileInSubFolder) + "\"]," + - " \"xml\": []" + // Edge case - checking also that this doesn't blow up anything + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(apexFileInParentFolder) + "\", \"" + makePathJsonSafe(apexFileInSubFolder) + "\"]," + + " \"minimumTokens\": 15" + + " }" + " }," + - " \"minimumTokens\": 15," + " \"skipDuplicateFiles\": false " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); @@ -434,10 +485,12 @@ void whenCallingRunWithTwoIdenticalFilesButSkipDuplicateFilesIsTrue_thenJsonOutp String apexFileInSubFolder = createTempFile(subFolder, "ApexClass1.cls", SAMPLE_APEX_1); String inputFileContents = "{" + - " \"filesToScanPerLanguage\": {" + - " \"apex\": [\"" + makePathJsonSafe(apexFileInParentFolder) + "\", \"" + makePathJsonSafe(apexFileInSubFolder) + "\"]" + + " \"runDataPerLanguage\": {" + + " \"apex\": {" + + " \"filesToScan\": [\"" + makePathJsonSafe(apexFileInParentFolder) + "\", \"" + makePathJsonSafe(apexFileInSubFolder) + "\"]," + + " \"minimumTokens\": 15" + + " }" + " }," + - " \"minimumTokens\": 15," + " \"skipDuplicateFiles\": true " + "}"; String inputFile = createTempFile(tempDir, "inputFile.json", inputFileContents); diff --git a/packages/code-analyzer-pmd-engine/src/config.ts b/packages/code-analyzer-pmd-engine/src/config.ts index c59366c7..ff723530 100644 --- a/packages/code-analyzer-pmd-engine/src/config.ts +++ b/packages/code-analyzer-pmd-engine/src/config.ts @@ -86,20 +86,27 @@ export type CpdEngineConfig = { // The languages that you may choose from are: 'apex', 'html', 'javascript' (or 'ecmascript'), 'typescript', 'visualforce', 'xml' rule_languages: string[] - // The minimum number of tokens required to be in a duplicate block of code in order to be reported as a violation. - // The concept of a token may be defined differently per language, but in general it a distinct basic element of source code. - // For example, this could be language specific keywords, identifiers, operators, literals, and more. - // See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more. - minimum_tokens: number + // Specifies the minimum tokens threshold for each rule language. + // The minimum tokens threshold is the number of tokens required to be in a duplicate block of code in order to be + // reported as a violation. The concept of a token may be defined differently per language, but in general it is a + // distinct basic element of source code. For example, this could be language specific keywords, identifiers, + // operators, literals, and more. See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more. + // If a value for a language is unspecified, then the default value of 100 will be used for that language. + minimum_tokens: Record // Indicates whether to ignore multiple copies of files of the same name and length. skip_duplicate_files: boolean } +const DEFAULT_MINIMUM_TOKENS: number = 100; + export const DEFAULT_CPD_ENGINE_CONFIG: CpdEngineConfig = { java_command: DEFAULT_JAVA_COMMAND, rule_languages: CPD_AVAILABLE_LANGUAGES, // hidden - minimum_tokens: 100, + minimum_tokens: CPD_AVAILABLE_LANGUAGES.reduce((obj, lang: string) => { + obj[lang] = DEFAULT_MINIMUM_TOKENS; + return obj; + }, {} as Record), skip_duplicate_files: false } @@ -117,7 +124,7 @@ export const CPD_ENGINE_CONFIG_DESCRIPTION: ConfigDescription = { minimum_tokens: { descriptionText: getMessage('CpdConfigFieldDescription_minimum_tokens'), - valueType: "number", + valueType: "object", defaultValue: DEFAULT_CPD_ENGINE_CONFIG.minimum_tokens, }, skip_duplicate_files: { @@ -328,12 +335,34 @@ class CpdConfigValueExtractor extends SharedConfigValueExtractor { return DEFAULT_CPD_ENGINE_CONFIG.rule_languages; } - extractMinimumTokens(): number { - const minimumTokens: number = this.configValueExtractor.extractNumber('minimum_tokens', DEFAULT_CPD_ENGINE_CONFIG.minimum_tokens)!; - if (minimumTokens <= 0 || Math.floor(minimumTokens) != minimumTokens) { - throw new Error(getMessage('InvalidPositiveInteger', this.configValueExtractor.getFieldPath('minimum_tokens'))); + extractMinimumTokens(): Record { + const minimumTokensExtractor: ConfigValueExtractor = this.configValueExtractor.extractObjectAsExtractor( + 'minimum_tokens', DEFAULT_CPD_ENGINE_CONFIG.minimum_tokens); + + // Start with a copy will all the default values + const minimumTokensMap: Record = {...DEFAULT_CPD_ENGINE_CONFIG.minimum_tokens}; + + // And override the default values with user provided values for each language found + for (const key of minimumTokensExtractor.getKeys()) { + let language: string = key.toLowerCase(); + if (language === 'ecmascript') { + // Provide support for 'ecmascript' which is a supported alias of 'javascript' + language = 'javascript'; + } + + if (!CPD_AVAILABLE_LANGUAGES.includes(language)) { + throw new Error(getMessage('InvalidFieldKeyForObject', + this.configValueExtractor.getFieldPath('minimum_tokens'), key, toAvailableLanguagesText(CPD_AVAILABLE_LANGUAGES))); + } + + const minimumTokensValue: number = minimumTokensExtractor.extractRequiredNumber(key); + if (minimumTokensValue <= 0 || Math.floor(minimumTokensValue) != minimumTokensValue) { + throw new Error(getMessage('InvalidPositiveInteger', minimumTokensExtractor.getFieldPath(key))); + } + minimumTokensMap[language] = minimumTokensValue; } - return minimumTokens; + + return minimumTokensMap; } extractSkipDuplicateFiles(): boolean { diff --git a/packages/code-analyzer-pmd-engine/src/cpd-engine.ts b/packages/code-analyzer-pmd-engine/src/cpd-engine.ts index f0fe8889..01c617c7 100644 --- a/packages/code-analyzer-pmd-engine/src/cpd-engine.ts +++ b/packages/code-analyzer-pmd-engine/src/cpd-engine.ts @@ -18,7 +18,6 @@ import {CPD_AVAILABLE_LANGUAGES, CpdEngineConfig} from "./config"; import { CpdBlockLocation, CpdLanguageRunResults, - CpdLanguageToFilesMap, CpdMatch, CpdRunInputData, CpdRunResults, @@ -67,24 +66,27 @@ export class CpdEngine extends Engine { const relevantLanguageToFilesMap: Map = await workspaceLiaison.getRelevantLanguageToFilesMap(); this.emitRunRulesProgressEvent(2); - const filesToScanPerLanguage: CpdLanguageToFilesMap = {}; - for (const languageId of ruleNames.map(getLanguageFromRuleName)) { - if (relevantLanguageToFilesMap.has(languageId)) { - // Calling toCpdLanguage is needed to convert the LanguageId to the identifier that CPD recognizes - filesToScanPerLanguage[toCpdLanguage(languageId)] = relevantLanguageToFilesMap.get(languageId)!; + const inputData: CpdRunInputData = { + runDataPerLanguage: {}, + skipDuplicateFiles: this.config.skip_duplicate_files + } + + const relevantLanguages: Set = new Set(ruleNames.map(getLanguageFromRuleName)); + for (const languageId of relevantLanguages) { + const filesToScanForLanguage: string[] = relevantLanguageToFilesMap.get(languageId) || []; + if (filesToScanForLanguage.length > 0) { + inputData.runDataPerLanguage[toCpdLanguage(languageId)] = { + filesToScan: filesToScanForLanguage, + minimumTokens: this.config.minimum_tokens[languageId] + } } } - if (Object.keys(filesToScanPerLanguage).length == 0) { + if (Object.keys(inputData.runDataPerLanguage).length === 0) { this.emitRunRulesProgressEvent(100); return { violations: [] }; } - const inputData: CpdRunInputData = { - filesToScanPerLanguage: filesToScanPerLanguage, - minimumTokens: this.config.minimum_tokens, - skipDuplicateFiles: this.config.skip_duplicate_files - } this.emitRunRulesProgressEvent(5); const cpdRunResults: CpdRunResults = await this.cpdWrapperInvoker.invokeRunCommand(inputData, diff --git a/packages/code-analyzer-pmd-engine/src/cpd-wrapper.ts b/packages/code-analyzer-pmd-engine/src/cpd-wrapper.ts index 975f6fe3..eb1b1f17 100644 --- a/packages/code-analyzer-pmd-engine/src/cpd-wrapper.ts +++ b/packages/code-analyzer-pmd-engine/src/cpd-wrapper.ts @@ -10,12 +10,12 @@ const CPD_WRAPPER_LIB_FOLDER: string = path.resolve(__dirname, '..', 'dist', 'pm const STDOUT_PROGRESS_MARKER = '[Progress]'; export type CpdRunInputData = { - filesToScanPerLanguage: CpdLanguageToFilesMap, - minimumTokens: number, + runDataPerLanguage: Record, skipDuplicateFiles: boolean } -export type CpdLanguageToFilesMap = { // JSON.stringify doesn't support maps, so we can't just use Map - [language: string]: string[] +export type LanguageSpecificCpdRunData = { + filesToScan: string[], + minimumTokens: number } export type CpdRunResults = { diff --git a/packages/code-analyzer-pmd-engine/src/messages.ts b/packages/code-analyzer-pmd-engine/src/messages.ts index 2b520bc6..970c8d73 100644 --- a/packages/code-analyzer-pmd-engine/src/messages.ts +++ b/packages/code-analyzer-pmd-engine/src/messages.ts @@ -30,10 +30,12 @@ const MESSAGE_CATALOG : { [key: string]: string } = { `To learn more about this configuration, visit: __LINK_COMING_SOON__`, CpdConfigFieldDescription_minimum_tokens: - `The minimum number of tokens required to be in a duplicate block of code in order to be reported as a violation.\n` + - `The concept of a token may be defined differently per language, but in general it a distinct basic element of source code.\n` + - `For example, this could be language specific keywords, identifiers, operators, literals, and more.\n` + - `See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more.`, + `Specifies the minimum tokens threshold for each rule language.\n` + + `The minimum tokens threshold is the number of tokens required to be in a duplicate block of code in order to be\n` + + `reported as a violation. The concept of a token may be defined differently per language, but in general it is a\n` + + `distinct basic element of source code. For example, this could be language specific keywords, identifiers,\n` + + `operators, literals, and more. See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more.\n` + + `If a value for a language is unspecified, then the default value of 100 will be used for that language.`, CpdConfigFieldDescription_skip_duplicate_files: `Indicates whether to ignore multiple copies of files of the same name and length.`, @@ -88,6 +90,9 @@ const MESSAGE_CATALOG : { [key: string]: string } = { InvalidPositiveInteger: `The '%s' configuration value is invalid. The value must be a positive integer.`, + + InvalidFieldKeyForObject: + `The '%s' configure value is invalid. The value contained an invalid key '%s'. Valid keys for this object are: %s` } /** diff --git a/packages/code-analyzer-pmd-engine/test/cpd-engine.test.ts b/packages/code-analyzer-pmd-engine/test/cpd-engine.test.ts index 0870ba8c..70433540 100644 --- a/packages/code-analyzer-pmd-engine/test/cpd-engine.test.ts +++ b/packages/code-analyzer-pmd-engine/test/cpd-engine.test.ts @@ -225,7 +225,9 @@ describe('Tests for the runRules method of CpdEngine', () => { it('When specifying a minimum_tokens length that is small enough to pick up smaller code blocks, then violations are returned', async () => { const engine: CpdEngine = new CpdEngine({ ...DEFAULT_CPD_ENGINE_CONFIG, - minimum_tokens: 10 + minimum_tokens: { + javascript: 10 + } }); const progressEvents: RunRulesProgressEvent[] = []; engine.onEvent(EventType.RunRulesProgressEvent, (e: RunRulesProgressEvent) => progressEvents.push(e)); diff --git a/packages/code-analyzer-pmd-engine/test/plugin.test.ts b/packages/code-analyzer-pmd-engine/test/plugin.test.ts index bd4a2325..2ce3b756 100644 --- a/packages/code-analyzer-pmd-engine/test/plugin.test.ts +++ b/packages/code-analyzer-pmd-engine/test/plugin.test.ts @@ -53,7 +53,14 @@ describe('Tests for the PmdCpdEnginesPlugin', () => { expect(resolvedConfig).toEqual({ java_command: resolvedConfig.java_command, // Already checked that it ends with 'java' rule_languages: ['apex', 'html', 'javascript', 'typescript', 'visualforce', 'xml'], - minimum_tokens: 100, + minimum_tokens: { + "apex": 100, + "html": 100, + "javascript": 100, + "typescript": 100, + "visualforce": 100, + "xml": 100 + }, skip_duplicate_files: false }); }); @@ -291,25 +298,47 @@ describe('Tests for the PmdCpdEnginesPlugin', () => { ]); }); - it(`When createEngineConfig for 'cpd' is given a minimum_tokens value that is not a number, then error`, async () => { + it(`When createEngineConfig for 'cpd' is given a minimum_tokens value that is not an object, then error`, async () => { const rawConfig: ConfigObject = {minimum_tokens: 'oops'}; const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd'); await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow( - getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.cpd.minimum_tokens', 'number', 'string')); + getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.cpd.minimum_tokens', 'object', 'string')); }); - it.each([-5,0,2.5])(`When createEngineConfig for 'cpd' is given a minumum_tokens number %s that is not a positive integer, then error`, async (invalidValue) => { - const rawConfig: ConfigObject = {minimum_tokens: invalidValue}; + it(`When createEngineConfig for 'cpd' is given a minimum_tokens.apex value that is not a number, then error`, async () => { + const rawConfig: ConfigObject = {minimum_tokens: {apex: 'oops'}}; const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd'); await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow( - getMessage('InvalidPositiveInteger', 'engines.cpd.minimum_tokens')); + getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.cpd.minimum_tokens.apex', 'number', 'string')); + }); + + it(`When createEngineConfig for 'cpd' is given a minimum_tokens with an invalid language, then error`, async () => { + const rawConfig: ConfigObject = {minimum_tokens: {apex: 100, oops: 100}}; + const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd'); + await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow( + getMessage('InvalidFieldKeyForObject', 'engines.cpd.minimum_tokens', 'oops', + "'apex', 'html', 'javascript' (or 'ecmascript'), 'typescript', 'visualforce', 'xml'")); + }); + + it.each([-5,0,2.5])(`When createEngineConfig for 'cpd' is given a minumum_tokens.xml number %s that is not a positive integer, then error`, async (invalidValue) => { + const rawConfig: ConfigObject = {minimum_tokens: {apex: 10, xml: invalidValue}}; + const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd'); + await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow( + getMessage('InvalidPositiveInteger', 'engines.cpd.minimum_tokens.xml')); }); it(`When createEngineConfig for 'cpd' is given a valid minimum_tokens value, then it is used`, async() => { - const rawConfig: ConfigObject = {minimum_tokens: 18}; + const rawConfig: ConfigObject = {minimum_tokens: {apex: 18, xml: 30, ecmascript: 25}}; // Also test that ecmascript can be used as an alias of javascript const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd'); const resolvedConfig: ConfigObject = await plugin.createEngineConfig('cpd', configValueExtractor); - expect(resolvedConfig['minimum_tokens']).toEqual(18); + expect(resolvedConfig['minimum_tokens']).toEqual({ + apex: 18, + html: 100, + javascript: 25, + typescript: 100, + visualforce: 100, + xml: 30 + }); }); it(`When createEngineConfig for 'cpd' is given a skip_duplicate_files value that is not a boolean, then error`, async () => {