diff --git a/.gitattributes b/.gitattributes index c94d36d3..a51a3543 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,4 +2,9 @@ # We do not wish to have GitHub add carriage returns for these files on windows. # So we forcefully set any file that ends in .goldfile. to only have \n characters: # See https://help.github.com/articles/dealing-with-line-endings/ -*.goldfile.* text eol=lf \ No newline at end of file +*.goldfile.* text eol=lf + +# Some of our input files must be forced to use a certain EOL signifier no matter what the OS wants. +# So we forcefully set any file that includes lf or crlf in its file extension to use the corresponding EOL. +*.lf.* text eol=lf +*.crlf.* text eol=crlf diff --git a/packages/code-analyzer-regex-engine/package.json b/packages/code-analyzer-regex-engine/package.json index d83ae356..c51dc28e 100644 --- a/packages/code-analyzer-regex-engine/package.json +++ b/packages/code-analyzer-regex-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-regex-engine", "description": "Plugin package that adds 'regex' as an engine into Salesforce Code Analyzer", - "version": "0.25.0", + "version": "0.25.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -62,4 +62,4 @@ "!src/index.ts" ] } -} \ No newline at end of file +} diff --git a/packages/code-analyzer-regex-engine/src/engine.ts b/packages/code-analyzer-regex-engine/src/engine.ts index 6bdd8476..680d311d 100644 --- a/packages/code-analyzer-regex-engine/src/engine.ts +++ b/packages/code-analyzer-regex-engine/src/engine.ts @@ -11,7 +11,6 @@ import { import path from "node:path"; import fs from "node:fs"; import * as fsp from 'node:fs/promises'; -import os from "node:os"; import {RegexRule, RegexRules} from "./config"; import {isBinaryFile} from "isbinaryfile"; import {convertToRegex, PromiseExecutionLimiter} from "./utils"; @@ -137,7 +136,8 @@ export class RegexEngine extends Engine { private async scanFileContents(fileName: string, fileContents: string, ruleName: string): Promise { const violations: Violation[] = []; const regex: RegExp = this.getRegExpFor(ruleName); - const newlineIndexes: number[] = getNewlineIndices(fileContents); + const contextuallyDerivedEol: string = contextuallyDeriveEolString(fileContents); + const newlineIndexes: number[] = getNewlineIndices(fileContents, contextuallyDerivedEol); for (const match of fileContents.matchAll(regex)) { let startIndex: number = match.index; @@ -151,9 +151,9 @@ export class RegexEngine extends Engine { } const startLine: number = getLineNumber(startIndex, newlineIndexes); - const startColumn: number = getColumnNumber(startIndex, newlineIndexes); + const startColumn: number = getColumnNumber(startIndex, newlineIndexes, contextuallyDerivedEol); const endLine: number = getLineNumber(startIndex + matchLength, newlineIndexes); - const endColumn: number = getColumnNumber(startIndex + matchLength, newlineIndexes); + const endColumn: number = getColumnNumber(startIndex + matchLength, newlineIndexes, contextuallyDerivedEol); const codeLocation: CodeLocation = { file: fileName, startLine: startLine, @@ -180,13 +180,13 @@ export class RegexEngine extends Engine { } -function getColumnNumber(charIndex: number, newlineIndexes: number[]): number { +function getColumnNumber(charIndex: number, newlineIndexes: number[], contextuallyDerivedEol: string): number { const idxOfNextNewline = newlineIndexes.findIndex(el => el >= charIndex); const idxOfCurrentLine = idxOfNextNewline === -1 ? newlineIndexes.length - 1: idxOfNextNewline - 1; if (idxOfCurrentLine === 0){ return charIndex + 1; } else { - const eolOffset = os.EOL.length - 1; + const eolOffset = contextuallyDerivedEol.length - 1; return charIndex - newlineIndexes.at(idxOfCurrentLine)! - eolOffset; } } @@ -196,8 +196,8 @@ function getLineNumber(charIndex: number, newlineIndexes: number[]): number{ return idxOfNextNewline === -1 ? newlineIndexes.length : idxOfNextNewline; } -function getNewlineIndices(fileContents: string): number[] { - const newlineRegex: RegExp = new RegExp(os.EOL, "g"); +function getNewlineIndices(fileContents: string, contextuallyDerivedEol: string): number[] { + const newlineRegex: RegExp = new RegExp(contextuallyDerivedEol, "g"); const matches = fileContents.matchAll(newlineRegex); const newlineIndexes = [-1]; @@ -207,6 +207,15 @@ function getNewlineIndices(fileContents: string): number[] { return newlineIndexes; } +/** + * We can't assume that the file's EOL indicator matches the OS's, because of edge cases such as a file created on Unix + * but scanned on Windows. So instead of consulting the OS for the EOL indicator, we look at the file and find which one it uses. + * @param contents + */ +function contextuallyDeriveEolString(contents: string): string { + return contents.indexOf('\r\n') !== -1 ? '\r\n' : '\n'; +} + async function isTextFile(fileName: string): Promise { const ext: string = path.extname(fileName).toLowerCase(); return TEXT_BASED_FILE_EXTS.has(ext) || !(await isBinaryFile(fileName)); diff --git a/packages/code-analyzer-regex-engine/test/engine.test.ts b/packages/code-analyzer-regex-engine/test/engine.test.ts index 206b6a83..f559a75a 100644 --- a/packages/code-analyzer-regex-engine/test/engine.test.ts +++ b/packages/code-analyzer-regex-engine/test/engine.test.ts @@ -39,7 +39,7 @@ const SAMPLE_CUSTOM_RULES: RegexRules = { NoTalkingAboutFightClub: { regex: '/fight club/gi', description: "The first rule of Fight Club is do not talk about Fight Club", - file_extensions: [".abc-def.xyz"], + file_extensions: [".abc-def.xyz", ".lf.cls", ".crlf.cls"], violation_message: "The second rule of Fight Club is DO NOT. TALK. ABOUT FIGHT CLUB.", severity: DEFAULT_SEVERITY_LEVEL, tags: ['PopCulture'] @@ -783,6 +783,48 @@ describe('Tests for runRules', () => { } }); + it('When files indicate EOL differently than OS, violations are still correct', async () => { + const runOptions: RunOptions = createRunOptions( + new Workspace('id', [path.resolve(__dirname, "test-data", "workspaceWithOddNewlines")])); + const runResults: EngineRunResults = await engine.runRules(['NoTalkingAboutFightClub'], runOptions); + + const expectedViolations: Violation[] = [ + { + ruleName: "NoTalkingAboutFightClub", + message: "The second rule of Fight Club is DO NOT. TALK. ABOUT FIGHT CLUB.", + primaryLocationIndex: 0, + codeLocations: [ + { + file: path.resolve(__dirname, "test-data", "workspaceWithOddNewlines", "testClass.lf.cls"), + startLine: 3, + startColumn: 87, + endLine: 3, + endColumn: 97 + } + ] + }, + { + ruleName: "NoTalkingAboutFightClub", + message: "The second rule of Fight Club is DO NOT. TALK. ABOUT FIGHT CLUB.", + primaryLocationIndex: 0, + codeLocations: [ + { + file: path.resolve(__dirname, "test-data", "workspaceWithOddNewlines", "testClass.crlf.cls"), + startLine: 3, + startColumn: 87, + endLine: 3, + endColumn: 97 + } + ] + } + ]; + + expect(runResults.violations).toHaveLength(expectedViolations.length); + for (const expectedViolation of expectedViolations) { + expect(runResults.violations).toContainEqual(expectedViolation); + } + }); + it("When running all rules compared to some rules, then output correctly returns what the correct violations according to specified rules", async () => { const runOptions: RunOptions = createRunOptions( new Workspace('id', [path.resolve(__dirname, "test-data", "sampleWorkspace")])); diff --git a/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.crlf.cls b/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.crlf.cls new file mode 100644 index 00000000..cb73e7e2 --- /dev/null +++ b/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.crlf.cls @@ -0,0 +1,6 @@ +public class TestClass{ + public static boolean doTheThing() { + List strings = new List{'This is fine', 'This is also fine', 'fight club', 'OH NO, that last string was bad!'}; + return false; + } +} \ No newline at end of file diff --git a/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.lf.cls b/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.lf.cls new file mode 100644 index 00000000..10038a81 --- /dev/null +++ b/packages/code-analyzer-regex-engine/test/test-data/workspaceWithOddNewlines/testClass.lf.cls @@ -0,0 +1,6 @@ +public class TestClass { + public static boolean doTheThing() { + List strings = new List{'THis is fine', 'THis is also fine', 'fight club', 'OH NO, that last string was bad!'}; + return false; + } +}