Skip to content

Commit fbcbc44

Browse files
purefunctorclaude
andcommitted
Move module name parsing from JS regex to WASM parser
The previous approach used a fragile regex to extract module names JS-side before registration. This refactors to pass the tar path and source to WASM, where the actual PureScript parser extracts the module name robustly. - Add RawModule type (path + source before parsing) - Add path field to PackageModule as stable identifier - Replace register_module with register_source in WASM - register_source returns parsed module name or None on failure Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 9de0de8 commit fbcbc44

File tree

6 files changed

+50
-40
lines changed

6 files changed

+50
-40
lines changed

docs/src/lib/packages/fetcher.ts

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pako from "pako";
2-
import type { PackageModule, PackageSet } from "./types";
2+
import type { RawModule, PackageSet } from "./types";
33

44
const REGISTRY_URL = "https://packages.registry.purescript.org";
55
const PACKAGE_SET_URL =
@@ -53,20 +53,11 @@ function parseTar(data: Uint8Array): Map<string, string> {
5353
return files;
5454
}
5555

56-
/**
57-
* Extract module name from PureScript source.
58-
* Parses the "module X.Y.Z where" declaration.
59-
*/
60-
function extractModuleName(source: string): string | null {
61-
const match = source.match(/^\s*module\s+([\w.]+)/m);
62-
return match ? match[1] : null;
63-
}
64-
6556
export async function fetchPackage(
6657
packageName: string,
6758
version: string,
6859
onProgress?: (progress: number) => void
69-
): Promise<PackageModule[]> {
60+
): Promise<RawModule[]> {
7061
// Strip 'v' prefix from version for registry URL
7162
const versionNum = version.startsWith("v") ? version.slice(1) : version;
7263
const url = `${REGISTRY_URL}/${packageName}/${versionNum}.tar.gz`;
@@ -108,13 +99,10 @@ export async function fetchPackage(
10899
// Extract .purs files
109100
const files = parseTar(tarData);
110101

111-
// Convert to modules
112-
const modules: PackageModule[] = [];
113-
for (const [, source] of files) {
114-
const moduleName = extractModuleName(source);
115-
if (moduleName) {
116-
modules.push({ name: moduleName, source });
117-
}
102+
// Convert to raw modules (path + source, no module name parsing)
103+
const modules: RawModule[] = [];
104+
for (const [path, source] of files) {
105+
modules.push({ path, source });
118106
}
119107

120108
return modules;

docs/src/lib/packages/types.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,16 @@ export interface PackageSetEntry {
77

88
export type PackageSet = Record<string, PackageSetEntry>;
99

10-
// Internal state
10+
// Raw module data from tar extraction (before WASM parsing)
11+
export interface RawModule {
12+
path: string; // tar path, e.g., "prelude-6.0.1/src/Data/Maybe.purs"
13+
source: string; // PureScript source code
14+
}
15+
16+
// Internal state (after WASM parsing extracts module name)
1117
export interface PackageModule {
12-
name: string; // e.g., "Data.Maybe"
18+
path: string; // tar path, e.g., "prelude-6.0.1/src/Data/Maybe.purs"
19+
name: string; // module name returned from WASM, e.g., "Data.Maybe"
1320
source: string; // PureScript source code
1421
}
1522

docs/src/lib/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,5 @@ export interface Lib {
6666
onProgress: (progress: PackageLoadProgress) => void
6767
): Promise<LoadedPackage[]>;
6868
clearPackages(): Promise<void>;
69-
registerModule(moduleName: string, source: string): Promise<void>;
69+
registerModule(path: string, source: string): Promise<string | undefined>;
7070
}

docs/src/wasm/src/engine.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,30 @@ impl WasmQueryEngine {
8484
}
8585
}
8686

87-
/// Register an external module (from a package).
88-
/// Returns the FileId for the module.
89-
pub fn register_external_module(&mut self, module_name: &str, source: &str) -> FileId {
90-
let path = format!("pkg://registry/{module_name}.purs");
91-
let id = self.files.borrow_mut().insert(path.as_str(), source);
92-
87+
/// Register an external module source, parsing the module name from source.
88+
/// Returns the parsed module name on success, or None if parsing fails.
89+
pub fn register_external_source(&mut self, path: &str, source: &str) -> Option<String> {
90+
// 1. Insert file into VFS → FileId
91+
let virtual_path = format!("pkg://registry/{path}");
92+
let id = self.files.borrow_mut().insert(virtual_path.as_str(), source);
93+
94+
// 2. Set content in input storage
9395
self.input.borrow_mut().content.insert(id, Arc::from(source));
9496

95-
let name_id = self.interned.borrow_mut().module.intern(module_name);
97+
// 3. Parse (using cached query infrastructure)
98+
let (parsed, _) = self.parsed(id).ok()?;
99+
100+
// 4. Extract module name
101+
let module_name = parsed.module_name()?;
102+
103+
// 5. Register module name → FileId mapping
104+
let name_id = self.interned.borrow_mut().module.intern(&module_name);
96105
self.input.borrow_mut().module.insert(name_id, id);
97106

107+
// Track for cleanup
98108
self.external_ids.push(id);
99-
id
109+
110+
Some(module_name.to_string())
100111
}
101112

102113
/// Clear all external modules (packages), keeping Prim and user modules.

docs/src/wasm/src/lib.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,11 @@ pub fn check(source: &str) -> JsValue {
314314
serde_wasm_bindgen::to_value(&result).unwrap()
315315
}
316316

317-
/// Register an external module (from a package) with the engine.
317+
/// Register an external module source, parsing the module name from source.
318+
/// Returns the parsed module name on success, or undefined if parsing fails.
318319
#[wasm_bindgen]
319-
pub fn register_module(module_name: &str, source: &str) {
320-
ENGINE.with_borrow_mut(|engine| {
321-
engine.register_external_module(module_name, source);
322-
});
320+
pub fn register_source(path: &str, source: &str) -> Option<String> {
321+
ENGINE.with_borrow_mut(|engine| engine.register_external_source(path, source))
323322
}
324323

325324
/// Clear all external modules (packages), keeping Prim and user modules.

docs/src/worker/docs-lib.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import init, * as docsLib from "docs-lib";
33
import type { ParseResult, CheckResult } from "../lib/types";
44
import type {
55
PackageSet,
6+
PackageModule,
67
LoadedPackage,
78
PackageLoadProgress,
89
PackageStatus,
@@ -60,17 +61,21 @@ const lib = {
6061
onProgress({ ...progress, packages: new Map(progress.packages) });
6162

6263
try {
63-
const modules = await fetchPackage(pkgName, entry.version, (p) => {
64+
const rawModules = await fetchPackage(pkgName, entry.version, (p) => {
6465
progress.packages.set(pkgName, { state: "downloading", progress: p });
6566
onProgress({ ...progress, packages: new Map(progress.packages) });
6667
});
6768

6869
progress.packages.set(pkgName, { state: "extracting" });
6970
onProgress({ ...progress, packages: new Map(progress.packages) });
7071

71-
// Register modules with WASM engine
72-
for (const mod of modules) {
73-
docsLib.register_module(mod.name, mod.source);
72+
// Register modules with WASM engine (parses module name from source)
73+
const modules: PackageModule[] = [];
74+
for (const raw of rawModules) {
75+
const moduleName = docsLib.register_source(raw.path, raw.source);
76+
if (moduleName) {
77+
modules.push({ path: raw.path, name: moduleName, source: raw.source });
78+
}
7479
}
7580

7681
progress.packages.set(pkgName, { state: "ready", moduleCount: modules.length });
@@ -105,8 +110,8 @@ const lib = {
105110
docsLib.clear_packages();
106111
},
107112

108-
async registerModule(moduleName: string, source: string): Promise<void> {
109-
docsLib.register_module(moduleName, source);
113+
async registerModule(path: string, source: string): Promise<string | undefined> {
114+
return docsLib.register_source(path, source);
110115
},
111116
};
112117

0 commit comments

Comments
 (0)