Skip to content

Commit 6e52bbd

Browse files
committed
refactor(linter/plugins): move location-related code into separate file (#14350)
Pure refactor. Move code related to dividing source text into lines and converting offset <--> `Location` into a separate file.
1 parent 13f1003 commit 6e52bbd

File tree

3 files changed

+151
-138
lines changed

3 files changed

+151
-138
lines changed

apps/oxlint/src-js/plugins/context.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { getFixes } from './fix.js';
2-
import { getIndexFromLoc, SOURCE_CODE } from './source_code.js';
2+
import { getOffsetFromLineColumn } from './location.js';
3+
import { SOURCE_CODE } from './source_code.js';
34

45
import type { Fix, FixFn } from './fix.ts';
56
import type { SourceCode } from './source_code.ts';
@@ -148,8 +149,8 @@ export class Context {
148149
if (hasOwn(diagnostic, 'loc') && (loc = (diagnostic as DiagnosticWithLoc).loc) != null) {
149150
// `loc`
150151
if (typeof loc !== 'object') throw new TypeError('`loc` must be an object');
151-
start = getIndexFromLoc(loc.start);
152-
end = getIndexFromLoc(loc.end);
152+
start = getOffsetFromLineColumn(loc.start);
153+
end = getOffsetFromLineColumn(loc.end);
153154
} else {
154155
// `node`
155156
const { node } = diagnostic as DiagnosticWithNode;
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import { initSourceText, sourceText } from './source_code.js';
2+
3+
import type { LineColumn } from './types.ts';
4+
5+
// Pattern for splitting source text into lines
6+
const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;
7+
8+
// Lazily populated when `SOURCE_CODE.lines` is accessed.
9+
// `lineStartOffsets` starts as `[0]`, and `resetLines` doesn't remove that initial element, so it's never empty.
10+
export const lines: string[] = [];
11+
const lineStartOffsets: number[] = [0];
12+
13+
/**
14+
* Split source text into lines.
15+
*/
16+
export function initLines(): void {
17+
if (sourceText === null) initSourceText();
18+
19+
// This implementation is based on the one in ESLint.
20+
// TODO: Investigate if using `String.prototype.matchAll` is faster.
21+
// This comment is above ESLint's implementation:
22+
/*
23+
* Previously, this was implemented using a regex that
24+
* matched a sequence of non-linebreak characters followed by a
25+
* linebreak, then adding the lengths of the matches. However,
26+
* this caused a catastrophic backtracking issue when the end
27+
* of a file contained a large number of non-newline characters.
28+
* To avoid this, the current implementation just matches newlines
29+
* and uses match.index to get the correct line start indices.
30+
*/
31+
32+
// Note: `lineStartOffsets` starts as `[0]`
33+
let lastOffset = 0, offset, match;
34+
while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
35+
offset = match.index;
36+
lines.push(sourceText.slice(lastOffset, offset));
37+
lineStartOffsets.push(lastOffset = offset + match[0].length);
38+
}
39+
lines.push(sourceText.slice(lastOffset));
40+
}
41+
42+
/**
43+
* Reset lines after file has been linted, to free memory.
44+
*/
45+
export function resetLines(): void {
46+
lines.length = 0;
47+
// Leave first entry (0) in place, discard the rest
48+
lineStartOffsets.length = 1;
49+
}
50+
51+
/**
52+
* Convert a source text index into a (line, column) pair.
53+
* @param offset - The index of a character in a file.
54+
* @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
55+
* @throws {TypeError|RangeError} If non-numeric `offset`, or `offset` out of range.
56+
*/
57+
export function getLineColumnFromOffset(offset: number): LineColumn {
58+
if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
59+
throw new TypeError('Expected `offset` to be a non-negative integer.');
60+
}
61+
62+
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
63+
// This also decodes `sourceText` if it wasn't already.
64+
if (lines.length === 0) initLines();
65+
66+
if (offset > sourceText.length) {
67+
throw new RangeError(
68+
`Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
69+
);
70+
}
71+
72+
// Binary search `lineStartOffsets` for the line containing `offset`
73+
let low = 0, high = lineStartOffsets.length, mid: number;
74+
do {
75+
mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
76+
if (offset < lineStartOffsets[mid]) {
77+
high = mid;
78+
} else {
79+
low = mid + 1;
80+
}
81+
} while (low < high);
82+
83+
return { line: low, column: offset - lineStartOffsets[low - 1] };
84+
}
85+
86+
/**
87+
* Convert a `{ line, column }` pair into a range index.
88+
* @param loc - A line/column location.
89+
* @returns The character index of the location in the file.
90+
* @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
91+
* or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
92+
*/
93+
export function getOffsetFromLineColumn(loc: LineColumn): number {
94+
if (loc !== null && typeof loc === 'object') {
95+
const { line, column } = loc;
96+
if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
97+
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
98+
// This also decodes `sourceText` if it wasn't already.
99+
if (lines.length === 0) initLines();
100+
101+
const linesCount = lineStartOffsets.length;
102+
if (line <= 0 || line > linesCount) {
103+
throw new RangeError(
104+
`Line number out of range (line ${line} requested). ` +
105+
`Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
106+
);
107+
}
108+
if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);
109+
110+
const lineOffset = lineStartOffsets[line - 1];
111+
const offset = lineOffset + column;
112+
113+
// Comment from ESLint implementation:
114+
/*
115+
* By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
116+
* the given line, provided that the line number is valid element of `lines`. Since the
117+
* last element of `lines` is an empty string for files with trailing newlines, add a
118+
* special case where getting the index for the first location after the end of the file
119+
* will return the length of the file, rather than throwing an error. This allows rules to
120+
* use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
121+
*/
122+
123+
let nextLineOffset;
124+
if (line === linesCount) {
125+
nextLineOffset = sourceText.length;
126+
if (offset <= nextLineOffset) return offset;
127+
} else {
128+
nextLineOffset = lineStartOffsets[line];
129+
if (offset < nextLineOffset) return offset;
130+
}
131+
132+
throw new RangeError(
133+
`Column number out of range (column ${column} requested, ` +
134+
`but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
135+
);
136+
}
137+
}
138+
139+
throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
140+
}

apps/oxlint/src-js/plugins/source_code.ts

Lines changed: 7 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,16 @@ import {
88
// @ts-expect-error we need to generate `.d.ts` file for this module
99
// We use the deserializer which removes `ParenthesizedExpression`s from AST to match ESLint
1010
import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts_range_parent_no_parens.js';
11+
import { getLineColumnFromOffset, getOffsetFromLineColumn, initLines, lines, resetLines } from './location.js';
1112

1213
import type { Program } from '@oxc-project/types';
1314
import type { Scope, ScopeManager, Variable } from './scope.ts';
14-
import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts';
15+
import type { BufferWithArrays, Comment, Node, NodeOrToken, Token } from './types.ts';
1516

1617
const require = createRequire(import.meta.url);
1718

1819
const { max } = Math;
1920

20-
// Pattern for splitting source text into lines
21-
const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;
22-
2321
// Text decoder, for decoding source text from buffer
2422
const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });
2523

@@ -31,15 +29,10 @@ let hasBOM = false;
3129

3230
// Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed,
3331
// or `initAst()` is called before the AST is walked.
34-
let sourceText: string | null = null;
32+
export let sourceText: string | null = null;
3533
let sourceByteLen: number = 0;
3634
export let ast: Program | null = null;
3735

38-
// Lazily populated when `SOURCE_CODE.lines` is accessed.
39-
// `lineStartOffsets` starts as `[0]`, and `resetSource` doesn't remove that initial element, so it's never empty.
40-
const lines: string[] = [],
41-
lineStartOffsets: number[] = [0];
42-
4336
// Lazily populated when `SOURCE_CODE.visitorKeys` is accessed.
4437
let visitorKeys: { [key: string]: string[] } | null = null;
4538

@@ -56,7 +49,7 @@ export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: b
5649
/**
5750
* Decode source text from buffer.
5851
*/
59-
function initSourceText(): void {
52+
export function initSourceText(): void {
6053
const { uint32 } = buffer,
6154
programPos = uint32[DATA_POINTER_POS_32];
6255
sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
@@ -71,35 +64,6 @@ export function initAst(): void {
7164
ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen);
7265
}
7366

74-
/**
75-
* Split source text into lines.
76-
*/
77-
function initLines(): void {
78-
if (sourceText === null) initSourceText();
79-
80-
// This implementation is based on the one in ESLint.
81-
// TODO: Investigate if using `String.prototype.matchAll` is faster.
82-
// This comment is above ESLint's implementation:
83-
/*
84-
* Previously, this was implemented using a regex that
85-
* matched a sequence of non-linebreak characters followed by a
86-
* linebreak, then adding the lengths of the matches. However,
87-
* this caused a catastrophic backtracking issue when the end
88-
* of a file contained a large number of non-newline characters.
89-
* To avoid this, the current implementation just matches newlines
90-
* and uses match.index to get the correct line start indices.
91-
*/
92-
93-
// Note: `lineStartOffsets` starts as `[0]`
94-
let lastOffset = 0, offset, match;
95-
while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
96-
offset = match.index;
97-
lines.push(sourceText.slice(lastOffset, offset));
98-
lineStartOffsets.push(lastOffset = offset + match[0].length);
99-
}
100-
lines.push(sourceText.slice(lastOffset));
101-
}
102-
10367
/**
10468
* Reset source after file has been linted, to free memory.
10569
*
@@ -114,8 +78,7 @@ export function resetSource(): void {
11478
buffer = null;
11579
sourceText = null;
11680
ast = null;
117-
lines.length = 0;
118-
lineStartOffsets.length = 1;
81+
resetLines();
11982
}
12083

12184
// `SourceCode` object.
@@ -495,8 +458,8 @@ export const SOURCE_CODE = Object.freeze({
495458
throw new Error('`sourceCode.getNodeByRangeIndex` not implemented yet'); // TODO
496459
},
497460

498-
getLocFromIndex,
499-
getIndexFromLoc,
461+
getLocFromIndex: getLineColumnFromOffset,
462+
getIndexFromLoc: getOffsetFromLineColumn,
500463

501464
/**
502465
* Check whether any comments exist or not between the given 2 nodes.
@@ -546,97 +509,6 @@ export const SOURCE_CODE = Object.freeze({
546509

547510
export type SourceCode = typeof SOURCE_CODE;
548511

549-
/**
550-
* Convert a source text index into a (line, column) pair.
551-
* @param offset The index of a character in a file.
552-
* @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
553-
* @throws {TypeError|RangeError} If non-numeric `index`, or `index` out of range.
554-
*/
555-
function getLocFromIndex(offset: number): LineColumn {
556-
if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
557-
throw new TypeError('Expected `offset` to be a non-negative integer.');
558-
}
559-
560-
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
561-
// This also decodes `sourceText` if it wasn't already.
562-
if (lines.length === 0) initLines();
563-
564-
if (offset > sourceText.length) {
565-
throw new RangeError(
566-
`Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
567-
);
568-
}
569-
570-
// Binary search `lineStartOffsets` for the line containing `offset`
571-
let low = 0, high = lineStartOffsets.length, mid: number;
572-
do {
573-
mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
574-
if (offset < lineStartOffsets[mid]) {
575-
high = mid;
576-
} else {
577-
low = mid + 1;
578-
}
579-
} while (low < high);
580-
581-
return { line: low, column: offset - lineStartOffsets[low - 1] };
582-
}
583-
584-
/**
585-
* Convert a `{ line, column }` pair into a range index.
586-
* @param loc - A line/column location.
587-
* @returns The range index of the location in the file.
588-
* @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
589-
* or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
590-
*/
591-
export function getIndexFromLoc(loc: LineColumn): number {
592-
if (loc !== null && typeof loc === 'object') {
593-
const { line, column } = loc;
594-
if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
595-
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
596-
// This also decodes `sourceText` if it wasn't already.
597-
if (lines.length === 0) initLines();
598-
599-
const linesCount = lineStartOffsets.length;
600-
if (line <= 0 || line > linesCount) {
601-
throw new RangeError(
602-
`Line number out of range (line ${line} requested). ` +
603-
`Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
604-
);
605-
}
606-
if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);
607-
608-
const lineOffset = lineStartOffsets[line - 1];
609-
const offset = lineOffset + column;
610-
611-
// Comment from ESLint implementation:
612-
/*
613-
* By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
614-
* the given line, provided that the line number is valid element of `lines`. Since the
615-
* last element of `lines` is an empty string for files with trailing newlines, add a
616-
* special case where getting the index for the first location after the end of the file
617-
* will return the length of the file, rather than throwing an error. This allows rules to
618-
* use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
619-
*/
620-
621-
let nextLineOffset;
622-
if (line === linesCount) {
623-
nextLineOffset = sourceText.length;
624-
if (offset <= nextLineOffset) return offset;
625-
} else {
626-
nextLineOffset = lineStartOffsets[line];
627-
if (offset < nextLineOffset) return offset;
628-
}
629-
630-
throw new RangeError(
631-
`Column number out of range (column ${column} requested, ` +
632-
`but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
633-
);
634-
}
635-
}
636-
637-
throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
638-
}
639-
640512
/**
641513
* Get all the ancestors of a given node.
642514
* @param node - AST node

0 commit comments

Comments
 (0)