refactor(linter/plugins): move location-related code into separate file (#14350)

overlookmotel · overlookmotel · commit 6e52bbd73c47 · 2025-10-05T12:40:13.000Z
Pure refactor. Move code related to dividing source text into lines and converting offset &lt;--&gt; `Location` into a separate file.
diff --git a/apps/oxlint/src-js/plugins/context.ts b/apps/oxlint/src-js/plugins/context.ts
@@ -1,5 +1,6 @@
 import { getFixes } from './fix.js';
-import { getIndexFromLoc, SOURCE_CODE } from './source_code.js';
+import { getOffsetFromLineColumn } from './location.js';
+import { SOURCE_CODE } from './source_code.js';
 
 import type { Fix, FixFn } from './fix.ts';
 import type { SourceCode } from './source_code.ts';
@@ -148,8 +149,8 @@ export class Context {
     if (hasOwn(diagnostic, 'loc') && (loc = (diagnostic as DiagnosticWithLoc).loc) != null) {
       // `loc`
       if (typeof loc !== 'object') throw new TypeError('`loc` must be an object');
-      start = getIndexFromLoc(loc.start);
-      end = getIndexFromLoc(loc.end);
+      start = getOffsetFromLineColumn(loc.start);
+      end = getOffsetFromLineColumn(loc.end);
     } else {
       // `node`
       const { node } = diagnostic as DiagnosticWithNode;
diff --git a/apps/oxlint/src-js/plugins/location.ts b/apps/oxlint/src-js/plugins/location.ts
@@ -0,0 +1,140 @@
+import { initSourceText, sourceText } from './source_code.js';
+
+import type { LineColumn } from './types.ts';
+
+// Pattern for splitting source text into lines
+const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;
+
+// Lazily populated when `SOURCE_CODE.lines` is accessed.
+// `lineStartOffsets` starts as `[0]`, and `resetLines` doesn't remove that initial element, so it's never empty.
+export const lines: string[] = [];
+const lineStartOffsets: number[] = [0];
+
+/**
+ * Split source text into lines.
+ */
+export function initLines(): void {
+  if (sourceText === null) initSourceText();
+
+  // This implementation is based on the one in ESLint.
+  // TODO: Investigate if using `String.prototype.matchAll` is faster.
+  // This comment is above ESLint's implementation:
+  /*
+   * Previously, this was implemented using a regex that
+   * matched a sequence of non-linebreak characters followed by a
+   * linebreak, then adding the lengths of the matches. However,
+   * this caused a catastrophic backtracking issue when the end
+   * of a file contained a large number of non-newline characters.
+   * To avoid this, the current implementation just matches newlines
+   * and uses match.index to get the correct line start indices.
+   */
+
+  // Note: `lineStartOffsets` starts as `[0]`
+  let lastOffset = 0, offset, match;
+  while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
+    offset = match.index;
+    lines.push(sourceText.slice(lastOffset, offset));
+    lineStartOffsets.push(lastOffset = offset + match[0].length);
+  }
+  lines.push(sourceText.slice(lastOffset));
+}
+
+/**
+ * Reset lines after file has been linted, to free memory.
+ */
+export function resetLines(): void {
+  lines.length = 0;
+  // Leave first entry (0) in place, discard the rest
+  lineStartOffsets.length = 1;
+}
+
+/**
+ * Convert a source text index into a (line, column) pair.
+ * @param offset - The index of a character in a file.
+ * @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
+ * @throws {TypeError|RangeError} If non-numeric `offset`, or `offset` out of range.
+ */
+export function getLineColumnFromOffset(offset: number): LineColumn {
+  if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
+    throw new TypeError('Expected `offset` to be a non-negative integer.');
+  }
+
+  // Build `lines` and `lineStartOffsets` tables if they haven't been already.
+  // This also decodes `sourceText` if it wasn't already.
+  if (lines.length === 0) initLines();
+
+  if (offset > sourceText.length) {
+    throw new RangeError(
+      `Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
+    );
+  }
+
+  // Binary search `lineStartOffsets` for the line containing `offset`
+  let low = 0, high = lineStartOffsets.length, mid: number;
+  do {
+    mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
+    if (offset < lineStartOffsets[mid]) {
+      high = mid;
+    } else {
+      low = mid + 1;
+    }
+  } while (low < high);
+
+  return { line: low, column: offset - lineStartOffsets[low - 1] };
+}
+
+/**
+ * Convert a `{ line, column }` pair into a range index.
+ * @param loc - A line/column location.
+ * @returns The character index of the location in the file.
+ * @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
+ *   or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
+ */
+export function getOffsetFromLineColumn(loc: LineColumn): number {
+  if (loc !== null && typeof loc === 'object') {
+    const { line, column } = loc;
+    if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
+      // Build `lines` and `lineStartOffsets` tables if they haven't been already.
+      // This also decodes `sourceText` if it wasn't already.
+      if (lines.length === 0) initLines();
+
+      const linesCount = lineStartOffsets.length;
+      if (line <= 0 || line > linesCount) {
+        throw new RangeError(
+          `Line number out of range (line ${line} requested). ` +
+            `Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
+        );
+      }
+      if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);
+
+      const lineOffset = lineStartOffsets[line - 1];
+      const offset = lineOffset + column;
+
+      // Comment from ESLint implementation:
+      /*
+       * By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
+       * the given line, provided that the line number is valid element of `lines`. Since the
+       * last element of `lines` is an empty string for files with trailing newlines, add a
+       * special case where getting the index for the first location after the end of the file
+       * will return the length of the file, rather than throwing an error. This allows rules to
+       * use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
+       */
+
+      let nextLineOffset;
+      if (line === linesCount) {
+        nextLineOffset = sourceText.length;
+        if (offset <= nextLineOffset) return offset;
+      } else {
+        nextLineOffset = lineStartOffsets[line];
+        if (offset < nextLineOffset) return offset;
+      }
+
+      throw new RangeError(
+        `Column number out of range (column ${column} requested, ` +
+          `but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
+      );
+    }
+  }
+
+  throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
+}
diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts
@@ -8,18 +8,16 @@ import {
 // @ts-expect-error we need to generate `.d.ts` file for this module
 // We use the deserializer which removes `ParenthesizedExpression`s from AST to match ESLint
 import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts_range_parent_no_parens.js';
+import { getLineColumnFromOffset, getOffsetFromLineColumn, initLines, lines, resetLines } from './location.js';
 
 import type { Program } from '@oxc-project/types';
 import type { Scope, ScopeManager, Variable } from './scope.ts';
-import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts';
+import type { BufferWithArrays, Comment, Node, NodeOrToken, Token } from './types.ts';
 
 const require = createRequire(import.meta.url);
 
 const { max } = Math;
 
-// Pattern for splitting source text into lines
-const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;
-
 // Text decoder, for decoding source text from buffer
 const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });
 
@@ -31,15 +29,10 @@ let hasBOM = false;
 
 // Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed,
 // or `initAst()` is called before the AST is walked.
-let sourceText: string | null = null;
+export let sourceText: string | null = null;
 let sourceByteLen: number = 0;
 export let ast: Program | null = null;
 
-// Lazily populated when `SOURCE_CODE.lines` is accessed.
-// `lineStartOffsets` starts as `[0]`, and `resetSource` doesn't remove that initial element, so it's never empty.
-const lines: string[] = [],
-  lineStartOffsets: number[] = [0];
-
 // Lazily populated when `SOURCE_CODE.visitorKeys` is accessed.
 let visitorKeys: { [key: string]: string[] } | null = null;
 
@@ -56,7 +49,7 @@ export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: b
 /**
  * Decode source text from buffer.
  */
-function initSourceText(): void {
+export function initSourceText(): void {
   const { uint32 } = buffer,
     programPos = uint32[DATA_POINTER_POS_32];
   sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
@@ -71,35 +64,6 @@ export function initAst(): void {
   ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen);
 }
 
-/**
- * Split source text into lines.
- */
-function initLines(): void {
-  if (sourceText === null) initSourceText();
-
-  // This implementation is based on the one in ESLint.
-  // TODO: Investigate if using `String.prototype.matchAll` is faster.
-  // This comment is above ESLint's implementation:
-  /*
-   * Previously, this was implemented using a regex that
-   * matched a sequence of non-linebreak characters followed by a
-   * linebreak, then adding the lengths of the matches. However,
-   * this caused a catastrophic backtracking issue when the end
-   * of a file contained a large number of non-newline characters.
-   * To avoid this, the current implementation just matches newlines
-   * and uses match.index to get the correct line start indices.
-   */
-
-  // Note: `lineStartOffsets` starts as `[0]`
-  let lastOffset = 0, offset, match;
-  while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
-    offset = match.index;
-    lines.push(sourceText.slice(lastOffset, offset));
-    lineStartOffsets.push(lastOffset = offset + match[0].length);
-  }
-  lines.push(sourceText.slice(lastOffset));
-}
-
 /**
  * Reset source after file has been linted, to free memory.
  *
@@ -114,8 +78,7 @@ export function resetSource(): void {
   buffer = null;
   sourceText = null;
   ast = null;
-  lines.length = 0;
-  lineStartOffsets.length = 1;
+  resetLines();
 }
 
 // `SourceCode` object.
@@ -495,8 +458,8 @@ export const SOURCE_CODE = Object.freeze({
     throw new Error('`sourceCode.getNodeByRangeIndex` not implemented yet'); // TODO
   },
 
-  getLocFromIndex,
-  getIndexFromLoc,
+  getLocFromIndex: getLineColumnFromOffset,
+  getIndexFromLoc: getOffsetFromLineColumn,
 
   /**
    * Check whether any comments exist or not between the given 2 nodes.
@@ -546,97 +509,6 @@ export const SOURCE_CODE = Object.freeze({
 
 export type SourceCode = typeof SOURCE_CODE;
 
-/**
- * Convert a source text index into a (line, column) pair.
- * @param offset The index of a character in a file.
- * @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
- * @throws {TypeError|RangeError} If non-numeric `index`, or `index` out of range.
- */
-function getLocFromIndex(offset: number): LineColumn {
-  if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
-    throw new TypeError('Expected `offset` to be a non-negative integer.');
-  }
-
-  // Build `lines` and `lineStartOffsets` tables if they haven't been already.
-  // This also decodes `sourceText` if it wasn't already.
-  if (lines.length === 0) initLines();
-
-  if (offset > sourceText.length) {
-    throw new RangeError(
-      `Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
-    );
-  }
-
-  // Binary search `lineStartOffsets` for the line containing `offset`
-  let low = 0, high = lineStartOffsets.length, mid: number;
-  do {
-    mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
-    if (offset < lineStartOffsets[mid]) {
-      high = mid;
-    } else {
-      low = mid + 1;
-    }
-  } while (low < high);
-
-  return { line: low, column: offset - lineStartOffsets[low - 1] };
-}
-
-/**
- * Convert a `{ line, column }` pair into a range index.
- * @param loc - A line/column location.
- * @returns The range index of the location in the file.
- * @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
- *   or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
- */
-export function getIndexFromLoc(loc: LineColumn): number {
-  if (loc !== null && typeof loc === 'object') {
-    const { line, column } = loc;
-    if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
-      // Build `lines` and `lineStartOffsets` tables if they haven't been already.
-      // This also decodes `sourceText` if it wasn't already.
-      if (lines.length === 0) initLines();
-
-      const linesCount = lineStartOffsets.length;
-      if (line <= 0 || line > linesCount) {
-        throw new RangeError(
-          `Line number out of range (line ${line} requested). ` +
-            `Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
-        );
-      }
-      if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);
-
-      const lineOffset = lineStartOffsets[line - 1];
-      const offset = lineOffset + column;
-
-      // Comment from ESLint implementation:
-      /*
-       * By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
-       * the given line, provided that the line number is valid element of `lines`. Since the
-       * last element of `lines` is an empty string for files with trailing newlines, add a
-       * special case where getting the index for the first location after the end of the file
-       * will return the length of the file, rather than throwing an error. This allows rules to
-       * use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
-       */
-
-      let nextLineOffset;
-      if (line === linesCount) {
-        nextLineOffset = sourceText.length;
-        if (offset <= nextLineOffset) return offset;
-      } else {
-        nextLineOffset = lineStartOffsets[line];
-        if (offset < nextLineOffset) return offset;
-      }
-
-      throw new RangeError(
-        `Column number out of range (column ${column} requested, ` +
-          `but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
-      );
-    }
-  }
-
-  throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
-}
-
 /**
  * Get all the ancestors of a given node.
  * @param node - AST node