Skip to content

Commit fb07020

Browse files
NEW(cpd): @W-16866916@: Add minimum_tokens and skip_duplicate_files to cpd engine config (#126)
1 parent 4c244eb commit fb07020

File tree

5 files changed

+182
-16
lines changed

5 files changed

+182
-16
lines changed

packages/code-analyzer-pmd-engine/src/config.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ export const CPD_ENGINE_CONFIG_DESCRIPTION: ConfigDescription = {
2727
fieldDescriptions: {
2828
java_command: getMessage('SharedConfigFieldDescription_java_command', CPD_ENGINE_NAME),
2929
rule_languages: getMessage('CpdConfigFieldDescription_rule_languages', toAvailableLanguagesText(CPD_AVAILABLE_LANGUAGES)),
30+
minimum_tokens: getMessage('CpdConfigFieldDescription_minimum_tokens'),
31+
skip_duplicate_files: getMessage('CpdConfigFieldDescription_skip_duplicate_files')
3032
}
3133
}
3234

@@ -75,11 +77,22 @@ export type CpdEngineConfig = {
7577
// List of languages associated with CPD to be made available for 'cpd' engine rule selection.
7678
// The languages that you may choose from are: 'apex', 'html', 'javascript' (or 'ecmascript'), 'typescript', 'visualforce', 'xml'
7779
rule_languages: string[]
80+
81+
// The minimum number of tokens required to be in a duplicate block of code in order to be reported as a violation.
82+
// The concept of a token may be defined differently per language, but in general it a distinct basic element of source code.
83+
// For example, this could be language specific keywords, identifiers, operators, literals, and more.
84+
// See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more.
85+
minimum_tokens: number
86+
87+
// Indicates whether to ignore multiple copies of files of the same name and length.
88+
skip_duplicate_files: boolean
7889
}
7990

8091
export const DEFAULT_CPD_ENGINE_CONFIG: CpdEngineConfig = {
8192
java_command: DEFAULT_JAVA_COMMAND,
82-
rule_languages: ['apex', 'html', 'javascript', 'typescript', 'visualforce', 'xml']
93+
rule_languages: ['apex', 'html', 'javascript', 'typescript', 'visualforce', 'xml'],
94+
minimum_tokens: 100,
95+
skip_duplicate_files: false
8396
}
8497

8598
export async function validateAndNormalizePmdConfig(configValueExtractor: ConfigValueExtractor,
@@ -103,7 +116,9 @@ export async function validateAndNormalizeCpdConfig(configValueExtractor: Config
103116

104117
return {
105118
java_command: await cpdConfigValueExtractor.extractJavaCommand(),
106-
rule_languages: cpdConfigValueExtractor.extractRuleLanguages()
119+
rule_languages: cpdConfigValueExtractor.extractRuleLanguages(),
120+
minimum_tokens: cpdConfigValueExtractor.extractMinimumTokens(),
121+
skip_duplicate_files: cpdConfigValueExtractor.extractSkipDuplicateFiles()
107122
}
108123
}
109124

@@ -278,6 +293,18 @@ class CpdConfigValueExtractor extends SharedConfigValueExtractor {
278293
protected getDefaultRuleLanguages(): string[] {
279294
return DEFAULT_CPD_ENGINE_CONFIG.rule_languages;
280295
}
296+
297+
extractMinimumTokens(): number {
298+
const minimumTokens: number = this.configValueExtractor.extractNumber('minimum_tokens', DEFAULT_CPD_ENGINE_CONFIG.minimum_tokens)!;
299+
if (minimumTokens <= 0 || Math.floor(minimumTokens) != minimumTokens) {
300+
throw new Error(getMessage('InvalidPositiveInteger', this.configValueExtractor.getFieldPath('minimum_tokens')));
301+
}
302+
return minimumTokens;
303+
}
304+
305+
extractSkipDuplicateFiles(): boolean {
306+
return this.configValueExtractor.extractBoolean('skip_duplicate_files', DEFAULT_CPD_ENGINE_CONFIG.skip_duplicate_files)!;
307+
}
281308
}
282309

283310
function toAvailableLanguagesText(languages: string[]): string {

packages/code-analyzer-pmd-engine/src/cpd-engine.ts

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ const RULE_NAME_PREFIX: string = 'DetectCopyPasteFor';
2929

3030
export class CpdEngine extends Engine {
3131
private readonly cpdWrapperInvoker: CpdWrapperInvoker;
32-
private readonly selectedLanguages: LanguageId[];
33-
private readonly minimumTokens: number;
34-
private readonly skipDuplicateFiles: boolean;
32+
private readonly config: CpdEngineConfig;
3533

3634
private workspaceLiaisonCache: Map<string, WorkspaceLiaison> = new Map();
3735

@@ -40,10 +38,7 @@ export class CpdEngine extends Engine {
4038
const javaCommandExecutor: JavaCommandExecutor = new JavaCommandExecutor(config.java_command);
4139
this.cpdWrapperInvoker = new CpdWrapperInvoker(javaCommandExecutor,
4240
(logLevel: LogLevel, message: string) => this.emitLogEvent(logLevel, message));
43-
44-
this.selectedLanguages = config.rule_languages as LanguageId[];
45-
this.minimumTokens = 100; // Will be configurable soon
46-
this.skipDuplicateFiles = false; // Will be configurable soon
41+
this.config = config;
4742
}
4843

4944
getName(): string {
@@ -79,8 +74,8 @@ export class CpdEngine extends Engine {
7974

8075
const inputData: CpdRunInputData = {
8176
filesToScanPerLanguage: filesToScanPerLanguage,
82-
minimumTokens: this.minimumTokens,
83-
skipDuplicateFiles: this.skipDuplicateFiles
77+
minimumTokens: this.config.minimum_tokens,
78+
skipDuplicateFiles: this.config.skip_duplicate_files
8479
}
8580
this.emitRunRulesProgressEvent(5);
8681

@@ -115,7 +110,7 @@ export class CpdEngine extends Engine {
115110
private getWorkspaceLiaison(workspace?: Workspace) : WorkspaceLiaison {
116111
const cacheKey: string = getCacheKey(workspace);
117112
if (!this.workspaceLiaisonCache.has(cacheKey)) {
118-
this.workspaceLiaisonCache.set(cacheKey, new WorkspaceLiaison(workspace, this.selectedLanguages));
113+
this.workspaceLiaisonCache.set(cacheKey, new WorkspaceLiaison(workspace, this.config.rule_languages as LanguageId[]));
119114
}
120115
return this.workspaceLiaisonCache.get(cacheKey)!
121116
}

packages/code-analyzer-pmd-engine/src/messages.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ const MESSAGE_CATALOG : { [key: string]: string } = {
3838
`List of languages associated with CPD to be made available for 'cpd' engine rule selection.\n` +
3939
`The languages that you may choose from are: %s.`,
4040

41+
CpdConfigFieldDescription_minimum_tokens:
42+
`The minimum number of tokens required to be in a duplicate block of code in order to be reported as a violation.\n` +
43+
`The concept of a token may be defined differently per language, but in general it a distinct basic element of source code.\n` +
44+
`For example, this could be language specific keywords, identifiers, operators, literals, and more.\n` +
45+
`See https://docs.pmd-code.org/latest/pmd_userdocs_cpd.html to learn more.`,
46+
47+
CpdConfigFieldDescription_skip_duplicate_files:
48+
`Indicates whether to ignore multiple copies of files of the same name and length.`,
49+
4150
UnsupportedEngineName:
4251
`The PmdCpdEnginesPlugin does not support an engine with name '%s'.`,
4352

@@ -84,7 +93,10 @@ const MESSAGE_CATALOG : { [key: string]: string } = {
8493
`Identify duplicate code blocks within your workspace files associated with the '%s' language.`,
8594

8695
DetectCopyPasteForLanguageViolationMessage:
87-
`Duplicate code detected for language '%s'. Found %d code locations containing the same block of code consisting of %d tokens across %d lines.`
96+
`Duplicate code detected for language '%s'. Found %d code locations containing the same block of code consisting of %d tokens across %d lines.`,
97+
98+
InvalidPositiveInteger:
99+
`The '%s' configuration value is invalid. The value must be a positive integer.`,
88100
}
89101

90102
/**

packages/code-analyzer-pmd-engine/test/cpd-engine.test.ts

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ async function expectRulesToMatchGoldFile(actualRuleDescriptions: RuleDescriptio
9696
describe('Tests for the runRules method of CpdEngine', () => {
9797
it('When zero rules names are provided then return zero violations', async () => {
9898
const engine: CpdEngine = new CpdEngine(DEFAULT_CPD_ENGINE_CONFIG);
99-
expect(await engine.runRules([],{workspace: new Workspace([__dirname])})).toEqual({violations: []});
99+
expect(await engine.runRules([], {workspace: new Workspace([__dirname])})).toEqual({violations: []});
100100
});
101101

102102
it('When rule name is not associated with a language that CPD knows about, then throw error', async () => {
@@ -214,8 +214,103 @@ describe('Tests for the runRules method of CpdEngine', () => {
214214
expect(results.violations).toContainEqual(expViolation2);
215215
expect(results.violations).toContainEqual(expViolation3);
216216

217+
// Notice how with the default 100 minimum_tokens that the sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens
218+
// file doesn't get picked up even though we specified the DetectCopyPasteForJavascript rule. See the next test.
219+
217220
// Also check that we have all the correct progress events
218221
expect(progressEvents.map(e => e.percentComplete)).toEqual([2, 5, 9.65, 14.3, 17.4, 20.5,
219222
26.7, 32.9, 39.1, 42.2, 45.3, 51.5, 57.7, 63.9, 67, 70.1, 76.3, 82.5, 88.7, 93.35, 98, 100]);
220223
});
224+
225+
it('When specifying a minimum_tokens length that is small enough to pick up smaller code blocks, then violations are returned', async () => {
226+
const engine: CpdEngine = new CpdEngine({
227+
...DEFAULT_CPD_ENGINE_CONFIG,
228+
minimum_tokens: 10
229+
});
230+
const progressEvents: RunRulesProgressEvent[] = [];
231+
engine.onEvent(EventType.RunRulesProgressEvent, (e: RunRulesProgressEvent) => progressEvents.push(e));
232+
233+
const workspace: Workspace = new Workspace([path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace')]);
234+
const ruleNames: string[] = ['DetectCopyPasteForJavascript'];
235+
236+
const results: EngineRunResults = await engine.runRules(ruleNames, {workspace: workspace});
237+
238+
const expViolation1: Violation = {
239+
ruleName: "DetectCopyPasteForJavascript",
240+
message: "Duplicate code detected for language 'javascript'. Found 2 code locations containing the same block of code consisting of 36 tokens across 10 lines.",
241+
primaryLocationIndex: 0,
242+
codeLocations: [
243+
{
244+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js'),
245+
startLine: 1,
246+
startColumn: 14,
247+
endLine: 10,
248+
endColumn: 2
249+
},
250+
{
251+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js'),
252+
startLine: 1,
253+
startColumn: 14,
254+
endLine: 10,
255+
endColumn: 2
256+
}
257+
]
258+
};
259+
260+
const expViolation2: Violation = {
261+
ruleName: "DetectCopyPasteForJavascript",
262+
message: "Duplicate code detected for language 'javascript'. Found 4 code locations containing the same block of code consisting of 13 tokens across 4 lines.",
263+
primaryLocationIndex: 0,
264+
codeLocations: [
265+
{
266+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js'),
267+
startLine: 1,
268+
startColumn: 15,
269+
endLine: 4,
270+
endColumn: 2
271+
},
272+
{
273+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js'),
274+
startLine: 6,
275+
startColumn: 10,
276+
endLine: 9,
277+
endColumn: 4
278+
},
279+
{
280+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js'),
281+
startLine: 1,
282+
startColumn: 15,
283+
endLine: 4,
284+
endColumn: 2
285+
},
286+
{
287+
file: path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace', 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js'),
288+
startLine: 6,
289+
startColumn: 10,
290+
endLine: 9,
291+
endColumn: 4
292+
}
293+
]
294+
};
295+
296+
expect(results.violations).toHaveLength(2);
297+
expect(results.violations).toContainEqual(expViolation1);
298+
expect(results.violations).toContainEqual(expViolation2);
299+
});
300+
301+
it('When skipping duplicate files, then results should not include duplicates from files of same name and length', async () => {
302+
const engine: CpdEngine = new CpdEngine({
303+
... DEFAULT_CPD_ENGINE_CONFIG,
304+
skip_duplicate_files: true
305+
});
306+
const progressEvents: RunRulesProgressEvent[] = [];
307+
engine.onEvent(EventType.RunRulesProgressEvent, (e: RunRulesProgressEvent) => progressEvents.push(e));
308+
309+
const workspace: Workspace = new Workspace([path.join(TEST_DATA_FOLDER, 'sampleCpdWorkspace')]);
310+
const ruleNames: string[] = ['DetectCopyPasteForHtml'];
311+
312+
const results: EngineRunResults = await engine.runRules(ruleNames, {workspace: workspace});
313+
314+
expect(results.violations).toHaveLength(0); // Should not pick up the someReplicatedFileWithOver100Tokens.html files
315+
});
221316
});

packages/code-analyzer-pmd-engine/test/plugin.test.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ describe('Tests for the PmdCpdEnginesPlugin', () => {
5757
expect(resolvedConfig.java_command.endsWith('java')).toEqual(true);
5858
expect(resolvedConfig).toEqual({
5959
java_command: resolvedConfig.java_command, // Already checked that it ends with 'java'
60-
rule_languages: ['apex', 'html', 'javascript', 'typescript', 'visualforce', 'xml']
60+
rule_languages: ['apex', 'html', 'javascript', 'typescript', 'visualforce', 'xml'],
61+
minimum_tokens: 100,
62+
skip_duplicate_files: false
6163
});
6264
});
6365

@@ -271,7 +273,7 @@ describe('Tests for the PmdCpdEnginesPlugin', () => {
271273
getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.pmd.custom_rulesets[2]', 'string', 'object'));
272274
});
273275

274-
it(`When createEngineConfig is given valid custom ruleset values, then make sure they are in resolved config`, async() => {
276+
it(`When createEngineConfig for 'pmd' is given valid custom ruleset values, then make sure they are in resolved config`, async () => {
275277
const rawConfig: ConfigObject = {
276278
java_classpath_entries: [
277279
path.join('test-data','custom rules','category_joshapex_somecat2.jar'),
@@ -294,6 +296,41 @@ describe('Tests for the PmdCpdEnginesPlugin', () => {
294296
]);
295297
});
296298

299+
it(`When createEngineConfig for 'cpd' is given a minimum_tokens value that is not a number, then error`, async () => {
300+
const rawConfig: ConfigObject = {minimum_tokens: 'oops'};
301+
const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd');
302+
await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow(
303+
getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.cpd.minimum_tokens', 'number', 'string'));
304+
});
305+
306+
it.each([-5,0,2.5])(`When createEngineConfig for 'cpd' is given a minumum_tokens number %s that is not a positive integer, then error`, async (invalidValue) => {
307+
const rawConfig: ConfigObject = {minimum_tokens: invalidValue};
308+
const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd');
309+
await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow(
310+
getMessage('InvalidPositiveInteger', 'engines.cpd.minimum_tokens'));
311+
});
312+
313+
it(`When createEngineConfig for 'cpd' is given a valid minimum_tokens value, then it is used`, async() => {
314+
const rawConfig: ConfigObject = {minimum_tokens: 18};
315+
const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd');
316+
const resolvedConfig: ConfigObject = await plugin.createEngineConfig('cpd', configValueExtractor);
317+
expect(resolvedConfig['minimum_tokens']).toEqual(18);
318+
});
319+
320+
it(`When createEngineConfig for 'cpd' is given a skip_duplicate_files value that is not a boolean, then error`, async () => {
321+
const rawConfig: ConfigObject = {skip_duplicate_files: 3};
322+
const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd');
323+
await expect(plugin.createEngineConfig('cpd', configValueExtractor)).rejects.toThrow(
324+
getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ConfigValueMustBeOfType', 'engines.cpd.skip_duplicate_files', 'boolean', 'number'));
325+
});
326+
327+
it(`When createEngineConfig for 'cpd' is given a valid skip_duplicate_files value, then it is used`, async() => {
328+
const rawConfig: ConfigObject = {skip_duplicate_files: true};
329+
const configValueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.cpd');
330+
const resolvedConfig: ConfigObject = await plugin.createEngineConfig('cpd', configValueExtractor);
331+
expect(resolvedConfig['skip_duplicate_files']).toEqual(true);
332+
});
333+
297334
it('When createEngineConfig is called with an unsupported engine name, then an error is thrown', async () => {
298335
await expect(plugin.createEngineConfig('oops', new ConfigValueExtractor({}))).rejects.toThrow(
299336
getMessage('UnsupportedEngineName', 'oops'));

0 commit comments

Comments
 (0)