@@ -96,7 +96,7 @@ async function expectRulesToMatchGoldFile(actualRuleDescriptions: RuleDescriptio
9696describe ( 'Tests for the runRules method of CpdEngine' , ( ) => {
9797 it ( 'When zero rules names are provided then return zero violations' , async ( ) => {
9898 const engine : CpdEngine = new CpdEngine ( DEFAULT_CPD_ENGINE_CONFIG ) ;
99- expect ( await engine . runRules ( [ ] , { workspace : new Workspace ( [ __dirname ] ) } ) ) . toEqual ( { violations : [ ] } ) ;
99+ expect ( await engine . runRules ( [ ] , { workspace : new Workspace ( [ __dirname ] ) } ) ) . toEqual ( { violations : [ ] } ) ;
100100 } ) ;
101101
102102 it ( 'When rule name is not associated with a language that CPD knows about, then throw error' , async ( ) => {
@@ -214,8 +214,103 @@ describe('Tests for the runRules method of CpdEngine', () => {
214214 expect ( results . violations ) . toContainEqual ( expViolation2 ) ;
215215 expect ( results . violations ) . toContainEqual ( expViolation3 ) ;
216216
217+ // Notice how with the default 100 minimum_tokens that the sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens
218+ // file doesn't get picked up even though we specified the DetectCopyPasteForJavascript rule. See the next test.
219+
217220 // Also check that we have all the correct progress events
218221 expect ( progressEvents . map ( e => e . percentComplete ) ) . toEqual ( [ 2 , 5 , 9.65 , 14.3 , 17.4 , 20.5 ,
219222 26.7 , 32.9 , 39.1 , 42.2 , 45.3 , 51.5 , 57.7 , 63.9 , 67 , 70.1 , 76.3 , 82.5 , 88.7 , 93.35 , 98 , 100 ] ) ;
220223 } ) ;
224+
225+ it ( 'When specifying a minimum_tokens length that is small enough to pick up smaller code blocks, then violations are returned' , async ( ) => {
226+ const engine : CpdEngine = new CpdEngine ( {
227+ ...DEFAULT_CPD_ENGINE_CONFIG ,
228+ minimum_tokens : 10
229+ } ) ;
230+ const progressEvents : RunRulesProgressEvent [ ] = [ ] ;
231+ engine . onEvent ( EventType . RunRulesProgressEvent , ( e : RunRulesProgressEvent ) => progressEvents . push ( e ) ) ;
232+
233+ const workspace : Workspace = new Workspace ( [ path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' ) ] ) ;
234+ const ruleNames : string [ ] = [ 'DetectCopyPasteForJavascript' ] ;
235+
236+ const results : EngineRunResults = await engine . runRules ( ruleNames , { workspace : workspace } ) ;
237+
238+ const expViolation1 : Violation = {
239+ ruleName : "DetectCopyPasteForJavascript" ,
240+ message : "Duplicate code detected for language 'javascript'. Found 2 code locations containing the same block of code consisting of 36 tokens across 10 lines." ,
241+ primaryLocationIndex : 0 ,
242+ codeLocations : [
243+ {
244+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js' ) ,
245+ startLine : 1 ,
246+ startColumn : 14 ,
247+ endLine : 10 ,
248+ endColumn : 2
249+ } ,
250+ {
251+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js' ) ,
252+ startLine : 1 ,
253+ startColumn : 14 ,
254+ endLine : 10 ,
255+ endColumn : 2
256+ }
257+ ]
258+ } ;
259+
260+ const expViolation2 : Violation = {
261+ ruleName : "DetectCopyPasteForJavascript" ,
262+ message : "Duplicate code detected for language 'javascript'. Found 4 code locations containing the same block of code consisting of 13 tokens across 4 lines." ,
263+ primaryLocationIndex : 0 ,
264+ codeLocations : [
265+ {
266+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js' ) ,
267+ startLine : 1 ,
268+ startColumn : 15 ,
269+ endLine : 4 ,
270+ endColumn : 2
271+ } ,
272+ {
273+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript1_ItselfContainsDuplicateBlocksButWithVeryFewTokens.js' ) ,
274+ startLine : 6 ,
275+ startColumn : 10 ,
276+ endLine : 9 ,
277+ endColumn : 4
278+ } ,
279+ {
280+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js' ) ,
281+ startLine : 1 ,
282+ startColumn : 15 ,
283+ endLine : 4 ,
284+ endColumn : 2
285+ } ,
286+ {
287+ file : path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' , 'sampleJavascript2_ContainsNearlyAllTheSameTokensAsSampleJavascript1.js' ) ,
288+ startLine : 6 ,
289+ startColumn : 10 ,
290+ endLine : 9 ,
291+ endColumn : 4
292+ }
293+ ]
294+ } ;
295+
296+ expect ( results . violations ) . toHaveLength ( 2 ) ;
297+ expect ( results . violations ) . toContainEqual ( expViolation1 ) ;
298+ expect ( results . violations ) . toContainEqual ( expViolation2 ) ;
299+ } ) ;
300+
301+ it ( 'When skipping duplicate files, then results should not include duplicates from files of same name and length' , async ( ) => {
302+ const engine : CpdEngine = new CpdEngine ( {
303+ ... DEFAULT_CPD_ENGINE_CONFIG ,
304+ skip_duplicate_files : true
305+ } ) ;
306+ const progressEvents : RunRulesProgressEvent [ ] = [ ] ;
307+ engine . onEvent ( EventType . RunRulesProgressEvent , ( e : RunRulesProgressEvent ) => progressEvents . push ( e ) ) ;
308+
309+ const workspace : Workspace = new Workspace ( [ path . join ( TEST_DATA_FOLDER , 'sampleCpdWorkspace' ) ] ) ;
310+ const ruleNames : string [ ] = [ 'DetectCopyPasteForHtml' ] ;
311+
312+ const results : EngineRunResults = await engine . runRules ( ruleNames , { workspace : workspace } ) ;
313+
314+ expect ( results . violations ) . toHaveLength ( 0 ) ; // Should not pick up the someReplicatedFileWithOver100Tokens.html files
315+ } ) ;
221316} ) ;
0 commit comments