From 96f4c5b373e3aca106d65b3b9d99408c32550b68 Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Fri, 28 Nov 2025 11:55:46 +0000
Subject: [PATCH] refactor(linter/plugins): add parse function

---
 Cargo.lock                                 |   3 +
 apps/oxlint/Cargo.toml                     |   3 +
 apps/oxlint/src-js/bindings.d.ts           |  39 +++++
 apps/oxlint/src-js/bindings.js             |   5 +-
 apps/oxlint/src-js/package/raw_transfer.ts | 148 ++++++++++++++++++
 apps/oxlint/src-js/plugins/lint.ts         |   2 +-
 apps/oxlint/src/js_plugins/mod.rs          |   3 +
 apps/oxlint/src/js_plugins/parse.rs        | 168 +++++++++++++++++++++
 apps/oxlint/src/run.rs                     |   9 ++
 crates/oxc_linter/src/lib.rs               |   4 +-
 10 files changed, 380 insertions(+), 4 deletions(-)
 create mode 100644 apps/oxlint/src-js/package/raw_transfer.ts
 create mode 100644 apps/oxlint/src/js_plugins/parse.rs
diff --git a/Cargo.lock b/Cargo.lock
index f0b425f5852e5..d14466aa527e9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2568,9 +2568,12 @@ dependencies = [
  "napi-derive",
  "oxc-miette",
  "oxc_allocator",
+ "oxc_ast_visit",
  "oxc_diagnostics",
  "oxc_language_server",
  "oxc_linter",
+ "oxc_parser",
+ "oxc_semantic",
  "oxc_span",
  "rayon",
  "rustc-hash",
diff --git a/apps/oxlint/Cargo.toml b/apps/oxlint/Cargo.toml
index 3e3c7ce7a9fcb..32039e0aa33ab 100644
--- a/apps/oxlint/Cargo.toml
+++ b/apps/oxlint/Cargo.toml
@@ -28,9 +28,12 @@ doctest = false
 
 [dependencies]
 oxc_allocator = { workspace = true, features = ["fixed_size"] }
+oxc_ast_visit = { workspace = true, features = ["serialize"] }
 oxc_diagnostics = { workspace = true }
 oxc_language_server = { workspace = true, features = ["linter"] }
 oxc_linter = { workspace = true }
+oxc_parser = { workspace = true }
+oxc_semantic = { workspace = true }
 oxc_span = { workspace = true }
 
 bpaf = { workspace = true, features = ["autocomplete", "bright-color", "derive"] }
diff --git a/apps/oxlint/src-js/bindings.d.ts b/apps/oxlint/src-js/bindings.d.ts
index 93921c1f0a557..f0a161bcdd837 100644
--- a/apps/oxlint/src-js/bindings.d.ts
+++ b/apps/oxlint/src-js/bindings.d.ts
@@ -1,5 +1,13 @@
 /* auto-generated by NAPI-RS */
 /* eslint-disable */
+/**
+ * Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
+ *
+ * Does not check that the offset is within bounds of `buffer`.
+ * To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
+ */
+export declare function getBufferOffset(buffer: Uint8Array): number
+
 /** JS callback to lint a file. */
 export type JsLintFileCb =
   ((arg0: string, arg1: number, arg2: Uint8Array | undefined | null, arg3: Array<number>, arg4: string) => string)
@@ -19,3 +27,34 @@ export type JsLoadPluginCb =
  * Returns `true` if linting succeeded without errors, `false` otherwise.
  */
 export declare function lint(args: Array<string>, loadPlugin: JsLoadPluginCb, lintFile: JsLintFileCb): Promise<boolean>
+
+/**
+ * Parse AST into provided `Uint8Array` buffer, synchronously.
+ *
+ * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
+ * provided as `source_len`.
+ *
+ * This function will parse the source, and write the AST into the buffer, starting at the end.
+ *
+ * It also writes to the very end of the buffer the offset of `Program` within the buffer.
+ *
+ * Caller can deserialize data from the buffer on JS side.
+ *
+ * # SAFETY
+ *
+ * Caller must ensure:
+ * * Source text is written into start of the buffer.
+ * * Source text's UTF-8 byte length is `source_len`.
+ * * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
+ *
+ * If source text is originally a JS string on JS side, and converted to a buffer with
+ * `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
+ *
+ * # Panics
+ *
+ * Panics if source text is too long, or AST takes more memory than is available in the buffer.
+ */
+export declare function parseRawSync(filename: string, buffer: Uint8Array, sourceLen: number): void
+
+/** Returns `true` if raw transfer is supported on this platform. */
+export declare function rawTransferSupported(): boolean
diff --git a/apps/oxlint/src-js/bindings.js b/apps/oxlint/src-js/bindings.js
index fc343d7db1af6..2d0231c968c4d 100644
--- a/apps/oxlint/src-js/bindings.js
+++ b/apps/oxlint/src-js/bindings.js
@@ -575,5 +575,8 @@ if (!nativeBinding) {
   throw new Error(`Failed to load native binding`)
 }
 
-const { lint } = nativeBinding
+const { getBufferOffset, lint, parseRawSync, rawTransferSupported } = nativeBinding
+export { getBufferOffset }
 export { lint }
+export { parseRawSync }
+export { rawTransferSupported }
diff --git a/apps/oxlint/src-js/package/raw_transfer.ts b/apps/oxlint/src-js/package/raw_transfer.ts
new file mode 100644
index 0000000000000..0c5a7d536cbbf
--- /dev/null
+++ b/apps/oxlint/src-js/package/raw_transfer.ts
@@ -0,0 +1,148 @@
+import {
+  getBufferOffset,
+  rawTransferSupported as rawTransferSupportedBinding,
+  parseRawSync,
+} from "../bindings.js";
+import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.js";
+import { buffers } from "../plugins/lint.js";
+import { BUFFER_SIZE, BUFFER_ALIGN, DATA_POINTER_POS_32 } from "../generated/constants.js";
+
+import type { BufferWithArrays } from "../plugins/types.js";
+
+// Size array buffer for raw transfer
+const ARRAY_BUFFER_SIZE = BUFFER_SIZE + BUFFER_ALIGN;
+
+// 1 GiB
+const ONE_GIB = 1 << 30;
+
+// Text encoder for encoding source text into buffer
+const textEncoder = new TextEncoder();
+
+// Buffer for raw transfer
+let buffer: BufferWithArrays | null = null;
+
+// Whether raw transfer is supported
+let rawTransferIsSupported: boolean | null = null;
+
+/**
+ * Parser source text into buffer.
+ * @param path - Path of file to parse
+ * @param sourceText - Source text to parse
+ * @throws {Error} If raw transfer is not supported on this platform, or parsing failed
+ */
+export function parse(path: string, sourceText: string) {
+  if (!rawTransferSupported()) {
+    throw new Error(
+      "`RuleTester` is not supported on 32-bit or big-endian systems, versions of NodeJS prior to v22.0.0, " +
+        "versions of Deno prior to v2.0.0, or other runtimes",
+    );
+  }
+
+  // Initialize buffer, if not already
+  if (buffer === null) initBuffer();
+  debugAssertIsNonNull(buffer);
+
+  // Write source into start of buffer.
+  // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB,
+  // so create a view into buffer of this size to write into.
+  const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB);
+  const { read, written: sourceByteLen } = textEncoder.encodeInto(sourceText, sourceBuffer);
+  if (read !== sourceText.length) throw new Error("Failed to write source text into buffer");
+
+  // Parse into buffer
+  parseRawSync(path, buffer, sourceByteLen);
+
+  // Check parsing succeeded.
+  // 0 is used as sentinel value to indicate parsing failed.
+  // TODO: Get parsing error details from Rust to display nicely.
+  const programOffset = buffer.uint32[DATA_POINTER_POS_32];
+  if (programOffset === 0) throw new Error("Parsing failed");
+}
+
+/**
+ * Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB.
+ *
+ * Store it in `buffer`, and also in `buffers` array, so it's accessible to `lintFileImpl` by passing `0`as `bufferId`.
+ *
+ * Achieve this by creating a 6 GiB `ArrayBuffer`, getting the offset within it that's aligned to 4 GiB,
+ * chopping off that number of bytes from the start, and shortening to 2 GiB.
+ *
+ * It's always possible to obtain a 2 GiB slice aligned on 4 GiB within a 6 GiB buffer,
+ * no matter how the 6 GiB buffer is aligned.
+ *
+ * Note: On systems with virtual memory, this only consumes 6 GiB of *virtual* memory.
+ * It does not consume physical memory until data is actually written to the `Uint8Array`.
+ * Physical memory consumed corresponds to the quantity of data actually written.
+ */
+export function initBuffer() {
+  // Create buffer
+  const arrayBuffer = new ArrayBuffer(ARRAY_BUFFER_SIZE);
+  const offset = getBufferOffset(new Uint8Array(arrayBuffer));
+  buffer = new Uint8Array(arrayBuffer, offset, BUFFER_SIZE) as BufferWithArrays;
+  buffer.uint32 = new Uint32Array(arrayBuffer, offset, BUFFER_SIZE / 4);
+  buffer.float64 = new Float64Array(arrayBuffer, offset, BUFFER_SIZE / 8);
+
+  // Store in `buffers`, at index 0
+  debugAssert(buffers.length === 0);
+  buffers.push(buffer);
+}
+
+/**
+ * Returns `true` if raw transfer is supported.
+ *
+ * Raw transfer is only supported on 64-bit little-endian systems,
+ * and NodeJS >= v22.0.0 or Deno >= v2.0.0.
+ *
+ * Versions of NodeJS prior to v22.0.0 do not support creating an `ArrayBuffer` larger than 4 GiB.
+ * Bun (as at v1.2.4) also does not support creating an `ArrayBuffer` larger than 4 GiB.
+ * Support on Deno v1 is unknown and it's EOL, so treating Deno before v2.0.0 as unsupported.
+ *
+ * No easy way to determining pointer width (64 bit or 32 bit) in JS,
+ * so call a function on Rust side to find out.
+ *
+ * @returns {boolean} - `true` if raw transfer is supported on this platform
+ */
+function rawTransferSupported() {
+  if (rawTransferIsSupported === null) {
+    rawTransferIsSupported = rawTransferRuntimeSupported() && rawTransferSupportedBinding();
+  }
+  return rawTransferIsSupported;
+}
+
+declare global {
+  var Bun: unknown;
+  var Deno:
+    | {
+        version: {
+          deno: string;
+        };
+      }
+    | undefined;
+}
+
+// Checks copied from:
+// https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/src/runtimes.ts
+// MIT license: https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/LICENCE
+function rawTransferRuntimeSupported() {
+  let global;
+  try {
+    global = globalThis;
+  } catch {
+    return false;
+  }
+
+  const isBun = !!global.Bun || !!global.process?.versions?.bun;
+  if (isBun) return false;
+
+  const isDeno = !!global.Deno;
+  if (isDeno) {
+    const match = Deno!.version?.deno?.match(/^(\d+)\./);
+    return !!match && +match[1] >= 2;
+  }
+
+  const isNode = global.process?.release?.name === "node";
+  if (!isNode) return false;
+
+  const match = process.version?.match(/^v(\d+)\./);
+  return !!match && +match[1] >= 22;
+}
diff --git a/apps/oxlint/src-js/plugins/lint.ts b/apps/oxlint/src-js/plugins/lint.ts
index c7990cd933427..51a87f2d27b98 100644
--- a/apps/oxlint/src-js/plugins/lint.ts
+++ b/apps/oxlint/src-js/plugins/lint.ts
@@ -29,7 +29,7 @@ import type { AfterHook, BufferWithArrays } from "./types.ts";
 // All buffers sent from Rust are stored in this array, indexed by `bufferId` (also sent from Rust).
 // Buffers are only added to this array, never removed, so no buffers will be garbage collected
 // until the process exits.
-const buffers: (BufferWithArrays | null)[] = [];
+export const buffers: (BufferWithArrays | null)[] = [];
 
 // Array of `after` hooks to run after traversal. This array reused for every file.
 const afterHooks: AfterHook[] = [];
diff --git a/apps/oxlint/src/js_plugins/mod.rs b/apps/oxlint/src/js_plugins/mod.rs
index 819fe152c10ef..2c71dbb8c5479 100644
--- a/apps/oxlint/src/js_plugins/mod.rs
+++ b/apps/oxlint/src/js_plugins/mod.rs
@@ -1,5 +1,8 @@
 mod external_linter;
 mod raw_fs;
 
+#[cfg(all(target_pointer_width = "64", target_endian = "little"))]
+pub mod parse;
+
 pub use external_linter::create_external_linter;
 pub use raw_fs::RawTransferFileSystem;
diff --git a/apps/oxlint/src/js_plugins/parse.rs b/apps/oxlint/src/js_plugins/parse.rs
new file mode 100644
index 0000000000000..ea3c54cc51fb1
--- /dev/null
+++ b/apps/oxlint/src/js_plugins/parse.rs
@@ -0,0 +1,168 @@
+use std::{
+    mem::ManuallyDrop,
+    ptr::{self, NonNull},
+};
+
+use napi::bindgen_prelude::Uint8Array;
+use napi_derive::napi;
+
+use oxc_allocator::Allocator;
+use oxc_ast_visit::utf8_to_utf16::Utf8ToUtf16;
+use oxc_linter::RawTransferMetadata;
+use oxc_parser::{ParseOptions, Parser};
+use oxc_semantic::SemanticBuilder;
+use oxc_span::SourceType;
+
+use crate::generated::raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE};
+
+const BUMP_ALIGN: usize = 16;
+
+/// Sentinel value for program offset to indicate parsing failed.
+///
+/// 0 cannot be a valid offset as it's the start of the buffer, which contains the source text.
+/// Allocator bumps downwards, so if source text was empty, the program would be somewhere at end of the buffer.
+const PARSE_FAIL_SENTINEL: u32 = 0;
+
+/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
+///
+/// Does not check that the offset is within bounds of `buffer`.
+/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
+#[napi]
+#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
+pub fn get_buffer_offset(buffer: Uint8Array) -> u32 {
+    let buffer = &*buffer;
+    let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN;
+    #[expect(clippy::cast_possible_truncation)]
+    return offset as u32;
+}
+
+/// Parse AST into provided `Uint8Array` buffer, synchronously.
+///
+/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
+/// provided as `source_len`.
+///
+/// This function will parse the source, and write the AST into the buffer, starting at the end.
+///
+/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
+///
+/// Caller can deserialize data from the buffer on JS side.
+///
+/// # SAFETY
+///
+/// Caller must ensure:
+/// * Source text is written into start of the buffer.
+/// * Source text's UTF-8 byte length is `source_len`.
+/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
+///
+/// If source text is originally a JS string on JS side, and converted to a buffer with
+/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
+///
+/// # Panics
+///
+/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
+#[napi]
+#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
+pub unsafe fn parse_raw_sync(filename: String, mut buffer: Uint8Array, source_len: u32) {
+    // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function
+    // during the time this `&mut [u8]` exists
+    let buffer = unsafe { buffer.as_mut() };
+
+    // SAFETY: `parse_raw_impl` has same safety requirements as this function
+    unsafe { parse_raw_impl(&filename, buffer, source_len) };
+}
+
+/// Parse AST into buffer.
+///
+/// # SAFETY
+///
+/// Caller must ensure:
+/// * Source text is written into start of the buffer.
+/// * Source text's UTF-8 byte length is `source_len`.
+/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
+///
+/// If source text is originally a JS string on JS side, and converted to a buffer with
+/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
+#[allow(clippy::items_after_statements, clippy::allow_attributes)]
+unsafe fn parse_raw_impl(filename: &str, buffer: &mut [u8], source_len: u32) {
+    // Check buffer has expected size and alignment
+    assert_eq!(buffer.len(), BUFFER_SIZE);
+    let buffer_ptr = ptr::from_mut(buffer).cast::<u8>();
+    assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN));
+
+    // Get offsets and size of data region to be managed by arena allocator.
+    // Leave space for source before it, and space for metadata after it.
+    // Metadata actually only takes 5 bytes, but round everything up to multiple of 16,
+    // as `bumpalo` requires that alignment.
+    const RAW_METADATA_SIZE: usize = size_of::<RawTransferMetadata>();
+    const {
+        assert!(RAW_METADATA_SIZE >= BUMP_ALIGN);
+        assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN));
+    };
+    let source_len = source_len as usize;
+    let data_offset = source_len.next_multiple_of(BUMP_ALIGN);
+    let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset);
+    assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long");
+
+    // Create `Allocator`.
+    // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic.
+    // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap
+    // or be out of bounds.
+    let data_ptr = unsafe { buffer_ptr.add(data_offset) };
+    debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN));
+    debug_assert!(data_size.is_multiple_of(BUMP_ALIGN));
+    // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`.
+    // `data_ptr` and `data_size` are multiples of 16.
+    // `data_size` is greater than `Allocator::MIN_SIZE`.
+    let allocator =
+        unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) };
+    let allocator = ManuallyDrop::new(allocator);
+
+    // Parse source.
+    // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator` exist after this.
+    let source_type = SourceType::from_path(filename).unwrap_or_default();
+
+    let program_offset = {
+        // SAFETY: We checked above that `source_len` does not exceed length of buffer
+        let source_text = unsafe { buffer.get_unchecked(..source_len) };
+        // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
+        let source_text = unsafe { str::from_utf8_unchecked(source_text) };
+
+        // Parse with same options as linter
+        let parser_ret = Parser::new(&allocator, source_text, source_type)
+            .with_options(ParseOptions {
+                parse_regular_expression: true,
+                allow_return_outside_function: true,
+                ..ParseOptions::default()
+            })
+            .parse();
+        let program = allocator.alloc(parser_ret.program);
+
+        // Check for semantic errors
+        let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(program);
+
+        if !parser_ret.errors.is_empty() || !semantic_ret.errors.is_empty() {
+            // Parsing failed. Return sentinel value to indicate this.
+            PARSE_FAIL_SENTINEL
+        } else {
+            // Convert spans to UTF-16
+            let span_converter = Utf8ToUtf16::new(source_text);
+            span_converter.convert_program(program);
+            span_converter.convert_comments(&mut program.comments);
+
+            // Return offset of `Program` within buffer (bottom 32 bits of pointer)
+            ptr::from_ref(program) as u32
+        }
+    };
+
+    // Write metadata into end of buffer
+    #[allow(clippy::cast_possible_truncation)]
+    let metadata = RawTransferMetadata::new(program_offset);
+    const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
+    const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
+    // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
+    // `RAW_METADATA_OFFSET` is aligned on 16.
+    #[expect(clippy::cast_ptr_alignment)]
+    unsafe {
+        buffer_ptr.add(RAW_METADATA_OFFSET).cast::<RawTransferMetadata>().write(metadata);
+    }
+}
diff --git a/apps/oxlint/src/run.rs b/apps/oxlint/src/run.rs
index 526c39a65cd3d..ff572e0f478d9 100644
--- a/apps/oxlint/src/run.rs
+++ b/apps/oxlint/src/run.rs
@@ -117,3 +117,12 @@ async fn lint_impl(
 
     CliRunner::new(command, external_linter).run(&mut stdout)
 }
+
+#[cfg(all(target_pointer_width = "64", target_endian = "little"))]
+pub use crate::js_plugins::parse::{get_buffer_offset, parse_raw_sync};
+
+/// Returns `true` if raw transfer is supported on this platform.
+#[napi]
+pub fn raw_transfer_supported() -> bool {
+    cfg!(all(target_pointer_width = "64", target_endian = "little"))
+}
diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs
index fe1900dd6068e..31be8b8f56310 100644
--- a/crates/oxc_linter/src/lib.rs
+++ b/crates/oxc_linter/src/lib.rs
@@ -554,7 +554,7 @@ impl Linter {
 /// Any changes made here also need to be made there.
 /// `oxc_ast_tools` checks that the 2 copies are identical.
 #[ast]
-struct RawTransferMetadata2 {
+pub struct RawTransferMetadata2 {
     /// Offset of `Program` within buffer.
     /// Note: In `RawTransferMetadata` (in `napi/parser`), this field is offset of `RawTransferData`,
     /// but here it's offset of `Program`.
@@ -565,7 +565,7 @@ struct RawTransferMetadata2 {
     pub(crate) _padding: u64,
 }
 
-use RawTransferMetadata2 as RawTransferMetadata;
+pub use RawTransferMetadata2 as RawTransferMetadata;
 
 impl RawTransferMetadata {
     pub fn new(data_offset: u32) -> Self {