From 96f4c5b373e3aca106d65b3b9d99408c32550b68 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 28 Nov 2025 11:55:46 +0000 Subject: [PATCH] refactor(linter/plugins): add parse function --- Cargo.lock | 3 + apps/oxlint/Cargo.toml | 3 + apps/oxlint/src-js/bindings.d.ts | 39 +++++ apps/oxlint/src-js/bindings.js | 5 +- apps/oxlint/src-js/package/raw_transfer.ts | 148 ++++++++++++++++++ apps/oxlint/src-js/plugins/lint.ts | 2 +- apps/oxlint/src/js_plugins/mod.rs | 3 + apps/oxlint/src/js_plugins/parse.rs | 168 +++++++++++++++++++++ apps/oxlint/src/run.rs | 9 ++ crates/oxc_linter/src/lib.rs | 4 +- 10 files changed, 380 insertions(+), 4 deletions(-) create mode 100644 apps/oxlint/src-js/package/raw_transfer.ts create mode 100644 apps/oxlint/src/js_plugins/parse.rs diff --git a/Cargo.lock b/Cargo.lock index f0b425f5852e5..d14466aa527e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2568,9 +2568,12 @@ dependencies = [ "napi-derive", "oxc-miette", "oxc_allocator", + "oxc_ast_visit", "oxc_diagnostics", "oxc_language_server", "oxc_linter", + "oxc_parser", + "oxc_semantic", "oxc_span", "rayon", "rustc-hash", diff --git a/apps/oxlint/Cargo.toml b/apps/oxlint/Cargo.toml index 3e3c7ce7a9fcb..32039e0aa33ab 100644 --- a/apps/oxlint/Cargo.toml +++ b/apps/oxlint/Cargo.toml @@ -28,9 +28,12 @@ doctest = false [dependencies] oxc_allocator = { workspace = true, features = ["fixed_size"] } +oxc_ast_visit = { workspace = true, features = ["serialize"] } oxc_diagnostics = { workspace = true } oxc_language_server = { workspace = true, features = ["linter"] } oxc_linter = { workspace = true } +oxc_parser = { workspace = true } +oxc_semantic = { workspace = true } oxc_span = { workspace = true } bpaf = { workspace = true, features = ["autocomplete", "bright-color", "derive"] } diff --git a/apps/oxlint/src-js/bindings.d.ts b/apps/oxlint/src-js/bindings.d.ts index 93921c1f0a557..f0a161bcdd837 100644 --- a/apps/oxlint/src-js/bindings.d.ts +++ b/apps/oxlint/src-js/bindings.d.ts @@ -1,5 +1,13 @@ /* auto-generated by NAPI-RS */ /* eslint-disable */ +/** + * Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`. + * + * Does not check that the offset is within bounds of `buffer`. + * To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes. + */ +export declare function getBufferOffset(buffer: Uint8Array): number + /** JS callback to lint a file. */ export type JsLintFileCb = ((arg0: string, arg1: number, arg2: Uint8Array | undefined | null, arg3: Array, arg4: string) => string) @@ -19,3 +27,34 @@ export type JsLoadPluginCb = * Returns `true` if linting succeeded without errors, `false` otherwise. */ export declare function lint(args: Array, loadPlugin: JsLoadPluginCb, lintFile: JsLintFileCb): Promise + +/** + * Parse AST into provided `Uint8Array` buffer, synchronously. + * + * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes) + * provided as `source_len`. + * + * This function will parse the source, and write the AST into the buffer, starting at the end. + * + * It also writes to the very end of the buffer the offset of `Program` within the buffer. + * + * Caller can deserialize data from the buffer on JS side. + * + * # SAFETY + * + * Caller must ensure: + * * Source text is written into start of the buffer. + * * Source text's UTF-8 byte length is `source_len`. + * * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string. + * + * If source text is originally a JS string on JS side, and converted to a buffer with + * `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8. + * + * # Panics + * + * Panics if source text is too long, or AST takes more memory than is available in the buffer. + */ +export declare function parseRawSync(filename: string, buffer: Uint8Array, sourceLen: number): void + +/** Returns `true` if raw transfer is supported on this platform. */ +export declare function rawTransferSupported(): boolean diff --git a/apps/oxlint/src-js/bindings.js b/apps/oxlint/src-js/bindings.js index fc343d7db1af6..2d0231c968c4d 100644 --- a/apps/oxlint/src-js/bindings.js +++ b/apps/oxlint/src-js/bindings.js @@ -575,5 +575,8 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { lint } = nativeBinding +const { getBufferOffset, lint, parseRawSync, rawTransferSupported } = nativeBinding +export { getBufferOffset } export { lint } +export { parseRawSync } +export { rawTransferSupported } diff --git a/apps/oxlint/src-js/package/raw_transfer.ts b/apps/oxlint/src-js/package/raw_transfer.ts new file mode 100644 index 0000000000000..0c5a7d536cbbf --- /dev/null +++ b/apps/oxlint/src-js/package/raw_transfer.ts @@ -0,0 +1,148 @@ +import { + getBufferOffset, + rawTransferSupported as rawTransferSupportedBinding, + parseRawSync, +} from "../bindings.js"; +import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.js"; +import { buffers } from "../plugins/lint.js"; +import { BUFFER_SIZE, BUFFER_ALIGN, DATA_POINTER_POS_32 } from "../generated/constants.js"; + +import type { BufferWithArrays } from "../plugins/types.js"; + +// Size array buffer for raw transfer +const ARRAY_BUFFER_SIZE = BUFFER_SIZE + BUFFER_ALIGN; + +// 1 GiB +const ONE_GIB = 1 << 30; + +// Text encoder for encoding source text into buffer +const textEncoder = new TextEncoder(); + +// Buffer for raw transfer +let buffer: BufferWithArrays | null = null; + +// Whether raw transfer is supported +let rawTransferIsSupported: boolean | null = null; + +/** + * Parser source text into buffer. + * @param path - Path of file to parse + * @param sourceText - Source text to parse + * @throws {Error} If raw transfer is not supported on this platform, or parsing failed + */ +export function parse(path: string, sourceText: string) { + if (!rawTransferSupported()) { + throw new Error( + "`RuleTester` is not supported on 32-bit or big-endian systems, versions of NodeJS prior to v22.0.0, " + + "versions of Deno prior to v2.0.0, or other runtimes", + ); + } + + // Initialize buffer, if not already + if (buffer === null) initBuffer(); + debugAssertIsNonNull(buffer); + + // Write source into start of buffer. + // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB, + // so create a view into buffer of this size to write into. + const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB); + const { read, written: sourceByteLen } = textEncoder.encodeInto(sourceText, sourceBuffer); + if (read !== sourceText.length) throw new Error("Failed to write source text into buffer"); + + // Parse into buffer + parseRawSync(path, buffer, sourceByteLen); + + // Check parsing succeeded. + // 0 is used as sentinel value to indicate parsing failed. + // TODO: Get parsing error details from Rust to display nicely. + const programOffset = buffer.uint32[DATA_POINTER_POS_32]; + if (programOffset === 0) throw new Error("Parsing failed"); +} + +/** + * Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB. + * + * Store it in `buffer`, and also in `buffers` array, so it's accessible to `lintFileImpl` by passing `0`as `bufferId`. + * + * Achieve this by creating a 6 GiB `ArrayBuffer`, getting the offset within it that's aligned to 4 GiB, + * chopping off that number of bytes from the start, and shortening to 2 GiB. + * + * It's always possible to obtain a 2 GiB slice aligned on 4 GiB within a 6 GiB buffer, + * no matter how the 6 GiB buffer is aligned. + * + * Note: On systems with virtual memory, this only consumes 6 GiB of *virtual* memory. + * It does not consume physical memory until data is actually written to the `Uint8Array`. + * Physical memory consumed corresponds to the quantity of data actually written. + */ +export function initBuffer() { + // Create buffer + const arrayBuffer = new ArrayBuffer(ARRAY_BUFFER_SIZE); + const offset = getBufferOffset(new Uint8Array(arrayBuffer)); + buffer = new Uint8Array(arrayBuffer, offset, BUFFER_SIZE) as BufferWithArrays; + buffer.uint32 = new Uint32Array(arrayBuffer, offset, BUFFER_SIZE / 4); + buffer.float64 = new Float64Array(arrayBuffer, offset, BUFFER_SIZE / 8); + + // Store in `buffers`, at index 0 + debugAssert(buffers.length === 0); + buffers.push(buffer); +} + +/** + * Returns `true` if raw transfer is supported. + * + * Raw transfer is only supported on 64-bit little-endian systems, + * and NodeJS >= v22.0.0 or Deno >= v2.0.0. + * + * Versions of NodeJS prior to v22.0.0 do not support creating an `ArrayBuffer` larger than 4 GiB. + * Bun (as at v1.2.4) also does not support creating an `ArrayBuffer` larger than 4 GiB. + * Support on Deno v1 is unknown and it's EOL, so treating Deno before v2.0.0 as unsupported. + * + * No easy way to determining pointer width (64 bit or 32 bit) in JS, + * so call a function on Rust side to find out. + * + * @returns {boolean} - `true` if raw transfer is supported on this platform + */ +function rawTransferSupported() { + if (rawTransferIsSupported === null) { + rawTransferIsSupported = rawTransferRuntimeSupported() && rawTransferSupportedBinding(); + } + return rawTransferIsSupported; +} + +declare global { + var Bun: unknown; + var Deno: + | { + version: { + deno: string; + }; + } + | undefined; +} + +// Checks copied from: +// https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/src/runtimes.ts +// MIT license: https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/LICENCE +function rawTransferRuntimeSupported() { + let global; + try { + global = globalThis; + } catch { + return false; + } + + const isBun = !!global.Bun || !!global.process?.versions?.bun; + if (isBun) return false; + + const isDeno = !!global.Deno; + if (isDeno) { + const match = Deno!.version?.deno?.match(/^(\d+)\./); + return !!match && +match[1] >= 2; + } + + const isNode = global.process?.release?.name === "node"; + if (!isNode) return false; + + const match = process.version?.match(/^v(\d+)\./); + return !!match && +match[1] >= 22; +} diff --git a/apps/oxlint/src-js/plugins/lint.ts b/apps/oxlint/src-js/plugins/lint.ts index c7990cd933427..51a87f2d27b98 100644 --- a/apps/oxlint/src-js/plugins/lint.ts +++ b/apps/oxlint/src-js/plugins/lint.ts @@ -29,7 +29,7 @@ import type { AfterHook, BufferWithArrays } from "./types.ts"; // All buffers sent from Rust are stored in this array, indexed by `bufferId` (also sent from Rust). // Buffers are only added to this array, never removed, so no buffers will be garbage collected // until the process exits. -const buffers: (BufferWithArrays | null)[] = []; +export const buffers: (BufferWithArrays | null)[] = []; // Array of `after` hooks to run after traversal. This array reused for every file. const afterHooks: AfterHook[] = []; diff --git a/apps/oxlint/src/js_plugins/mod.rs b/apps/oxlint/src/js_plugins/mod.rs index 819fe152c10ef..2c71dbb8c5479 100644 --- a/apps/oxlint/src/js_plugins/mod.rs +++ b/apps/oxlint/src/js_plugins/mod.rs @@ -1,5 +1,8 @@ mod external_linter; mod raw_fs; +#[cfg(all(target_pointer_width = "64", target_endian = "little"))] +pub mod parse; + pub use external_linter::create_external_linter; pub use raw_fs::RawTransferFileSystem; diff --git a/apps/oxlint/src/js_plugins/parse.rs b/apps/oxlint/src/js_plugins/parse.rs new file mode 100644 index 0000000000000..ea3c54cc51fb1 --- /dev/null +++ b/apps/oxlint/src/js_plugins/parse.rs @@ -0,0 +1,168 @@ +use std::{ + mem::ManuallyDrop, + ptr::{self, NonNull}, +}; + +use napi::bindgen_prelude::Uint8Array; +use napi_derive::napi; + +use oxc_allocator::Allocator; +use oxc_ast_visit::utf8_to_utf16::Utf8ToUtf16; +use oxc_linter::RawTransferMetadata; +use oxc_parser::{ParseOptions, Parser}; +use oxc_semantic::SemanticBuilder; +use oxc_span::SourceType; + +use crate::generated::raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE}; + +const BUMP_ALIGN: usize = 16; + +/// Sentinel value for program offset to indicate parsing failed. +/// +/// 0 cannot be a valid offset as it's the start of the buffer, which contains the source text. +/// Allocator bumps downwards, so if source text was empty, the program would be somewhere at end of the buffer. +const PARSE_FAIL_SENTINEL: u32 = 0; + +/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`. +/// +/// Does not check that the offset is within bounds of `buffer`. +/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes. +#[napi] +#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)] +pub fn get_buffer_offset(buffer: Uint8Array) -> u32 { + let buffer = &*buffer; + let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN; + #[expect(clippy::cast_possible_truncation)] + return offset as u32; +} + +/// Parse AST into provided `Uint8Array` buffer, synchronously. +/// +/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes) +/// provided as `source_len`. +/// +/// This function will parse the source, and write the AST into the buffer, starting at the end. +/// +/// It also writes to the very end of the buffer the offset of `Program` within the buffer. +/// +/// Caller can deserialize data from the buffer on JS side. +/// +/// # SAFETY +/// +/// Caller must ensure: +/// * Source text is written into start of the buffer. +/// * Source text's UTF-8 byte length is `source_len`. +/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string. +/// +/// If source text is originally a JS string on JS side, and converted to a buffer with +/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8. +/// +/// # Panics +/// +/// Panics if source text is too long, or AST takes more memory than is available in the buffer. +#[napi] +#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)] +pub unsafe fn parse_raw_sync(filename: String, mut buffer: Uint8Array, source_len: u32) { + // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function + // during the time this `&mut [u8]` exists + let buffer = unsafe { buffer.as_mut() }; + + // SAFETY: `parse_raw_impl` has same safety requirements as this function + unsafe { parse_raw_impl(&filename, buffer, source_len) }; +} + +/// Parse AST into buffer. +/// +/// # SAFETY +/// +/// Caller must ensure: +/// * Source text is written into start of the buffer. +/// * Source text's UTF-8 byte length is `source_len`. +/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string. +/// +/// If source text is originally a JS string on JS side, and converted to a buffer with +/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8. +#[allow(clippy::items_after_statements, clippy::allow_attributes)] +unsafe fn parse_raw_impl(filename: &str, buffer: &mut [u8], source_len: u32) { + // Check buffer has expected size and alignment + assert_eq!(buffer.len(), BUFFER_SIZE); + let buffer_ptr = ptr::from_mut(buffer).cast::(); + assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN)); + + // Get offsets and size of data region to be managed by arena allocator. + // Leave space for source before it, and space for metadata after it. + // Metadata actually only takes 5 bytes, but round everything up to multiple of 16, + // as `bumpalo` requires that alignment. + const RAW_METADATA_SIZE: usize = size_of::(); + const { + assert!(RAW_METADATA_SIZE >= BUMP_ALIGN); + assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN)); + }; + let source_len = source_len as usize; + let data_offset = source_len.next_multiple_of(BUMP_ALIGN); + let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset); + assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long"); + + // Create `Allocator`. + // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic. + // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap + // or be out of bounds. + let data_ptr = unsafe { buffer_ptr.add(data_offset) }; + debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN)); + debug_assert!(data_size.is_multiple_of(BUMP_ALIGN)); + // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`. + // `data_ptr` and `data_size` are multiples of 16. + // `data_size` is greater than `Allocator::MIN_SIZE`. + let allocator = + unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) }; + let allocator = ManuallyDrop::new(allocator); + + // Parse source. + // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator` exist after this. + let source_type = SourceType::from_path(filename).unwrap_or_default(); + + let program_offset = { + // SAFETY: We checked above that `source_len` does not exceed length of buffer + let source_text = unsafe { buffer.get_unchecked(..source_len) }; + // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8 + let source_text = unsafe { str::from_utf8_unchecked(source_text) }; + + // Parse with same options as linter + let parser_ret = Parser::new(&allocator, source_text, source_type) + .with_options(ParseOptions { + parse_regular_expression: true, + allow_return_outside_function: true, + ..ParseOptions::default() + }) + .parse(); + let program = allocator.alloc(parser_ret.program); + + // Check for semantic errors + let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(program); + + if !parser_ret.errors.is_empty() || !semantic_ret.errors.is_empty() { + // Parsing failed. Return sentinel value to indicate this. + PARSE_FAIL_SENTINEL + } else { + // Convert spans to UTF-16 + let span_converter = Utf8ToUtf16::new(source_text); + span_converter.convert_program(program); + span_converter.convert_comments(&mut program.comments); + + // Return offset of `Program` within buffer (bottom 32 bits of pointer) + ptr::from_ref(program) as u32 + } + }; + + // Write metadata into end of buffer + #[allow(clippy::cast_possible_truncation)] + let metadata = RawTransferMetadata::new(program_offset); + const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE; + const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN)); + // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`. + // `RAW_METADATA_OFFSET` is aligned on 16. + #[expect(clippy::cast_ptr_alignment)] + unsafe { + buffer_ptr.add(RAW_METADATA_OFFSET).cast::().write(metadata); + } +} diff --git a/apps/oxlint/src/run.rs b/apps/oxlint/src/run.rs index 526c39a65cd3d..ff572e0f478d9 100644 --- a/apps/oxlint/src/run.rs +++ b/apps/oxlint/src/run.rs @@ -117,3 +117,12 @@ async fn lint_impl( CliRunner::new(command, external_linter).run(&mut stdout) } + +#[cfg(all(target_pointer_width = "64", target_endian = "little"))] +pub use crate::js_plugins::parse::{get_buffer_offset, parse_raw_sync}; + +/// Returns `true` if raw transfer is supported on this platform. +#[napi] +pub fn raw_transfer_supported() -> bool { + cfg!(all(target_pointer_width = "64", target_endian = "little")) +} diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs index fe1900dd6068e..31be8b8f56310 100644 --- a/crates/oxc_linter/src/lib.rs +++ b/crates/oxc_linter/src/lib.rs @@ -554,7 +554,7 @@ impl Linter { /// Any changes made here also need to be made there. /// `oxc_ast_tools` checks that the 2 copies are identical. #[ast] -struct RawTransferMetadata2 { +pub struct RawTransferMetadata2 { /// Offset of `Program` within buffer. /// Note: In `RawTransferMetadata` (in `napi/parser`), this field is offset of `RawTransferData`, /// but here it's offset of `Program`. @@ -565,7 +565,7 @@ struct RawTransferMetadata2 { pub(crate) _padding: u64, } -use RawTransferMetadata2 as RawTransferMetadata; +pub use RawTransferMetadata2 as RawTransferMetadata; impl RawTransferMetadata { pub fn new(data_offset: u32) -> Self {