From ea27ece8d9912b5348abbc0564baa7b8003069a9 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 18 May 2026 19:33:24 -0600 Subject: [PATCH 1/5] feat(fsharp): route .fsi files through the dedicated signature grammar The tree-sitter-fsharp package ships two distinct grammars: LANGUAGE_FSHARP for .fs / .fsx source files and LANGUAGE_SIGNATURE for .fsi signature files. Both engines previously routed all three extensions through the source grammar, so bare `val` declarations in .fsi files surfaced as ERROR nodes and yielded no symbols. This change adds a separate `fsharp-signature` language for .fsi: * native: new `FSharpSignature` LanguageKind wired to LANGUAGE_SIGNATURE * WASM: new `fsharp-signature` registry entry using tree-sitter-fsharp_signature.wasm (build script now produces it) * shared F# extractor handles `value_definition` only when its first child is the `val` keyword, distinguishing signature `val foo : type` from source `let foo = ...` * function vs variable kind is inferred from the type shape; supports both `function_type` (WASM npm 0.1.0) and `curried_spec` (cargo 0.3.0) node shapes for engine parity docs check acknowledged: README's F# row already covers .fs/.fsx/.fsi and the user-facing language count is unchanged; fsharp-signature is an internal id that mirrors how ocaml-interface backs .mli files. Closes #1114 --- .../codegraph-core/src/extractors/fsharp.rs | 152 ++++++++++++++++++ crates/codegraph-core/src/extractors/mod.rs | 2 +- crates/codegraph-core/src/parser_registry.rs | 14 +- scripts/build-wasm.ts | 1 + src/ast-analysis/rules/index.ts | 2 + src/domain/parser.ts | 9 +- src/domain/wasm-worker-entry.ts | 9 +- src/extractors/fsharp.ts | 65 ++++++++ src/types.ts | 1 + tests/parsers/fsharp-signature.test.ts | 63 ++++++++ 10 files changed, 311 insertions(+), 7 deletions(-) create mode 100644 tests/parsers/fsharp-signature.test.ts diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs index 900084176..ec7c41a72 100644 --- a/crates/codegraph-core/src/extractors/fsharp.rs +++ b/crates/codegraph-core/src/extractors/fsharp.rs @@ -24,6 +24,7 @@ fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep "import_decl" => handle_import_decl(node, source, symbols), "application_expression" => handle_application(node, source, symbols), "dot_expression" => handle_dot_expression(node, source, symbols), + "value_definition" => handle_value_definition(node, source, symbols), _ => {} } } @@ -300,3 +301,154 @@ fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) }); } } + +/// Handle `val name : type` declarations in `.fsi` signature files. +/// +/// The signature grammar reuses the `value_definition` node kind for `val` +/// declarations, distinguished from the source grammar's `let` bindings by +/// the first child being the literal `val` keyword. Source-file +/// `value_definition` nodes (which start with `let`) are intentionally +/// ignored here to preserve `.fs` extractor parity. +fn handle_value_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let first = match node.child(0) { + Some(c) => c, + None => return, + }; + if first.kind() != "val" { + return; + } + + let decl_left = match find_child(node, "value_declaration_left") { + Some(n) => n, + None => return, + }; + let name = match extract_value_name(&decl_left, source) { + Some(n) => n, + None => return, + }; + + let kind = if has_function_type(node) { "function" } else { "variable" }; + let module_name = enclosing_module_name(node, source); + let qualified = match module_name { + Some(m) => format!("{}.{}", m, name), + None => name, + }; + + symbols.definitions.push(Definition { + name: qualified, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn extract_value_name(decl_left: &Node, source: &[u8]) -> Option { + let pattern = find_child(decl_left, "identifier_pattern")?; + let ident = find_child(&pattern, "long_identifier_or_op") + .and_then(|n| find_child(&n, "identifier")) + .or_else(|| find_child(&pattern, "identifier"))?; + Some(node_text(&ident, source).to_string()) +} + +fn has_function_type(node: &Node) -> bool { + // The two grammar versions use different node shapes for type signatures: + // + // • WASM (tree-sitter-fsharp npm 0.1.0): `function_type` is the explicit + // function-type kind, only present for `a -> b` types. + // • Native (tree-sitter-fsharp 0.3.0): every type signature is wrapped + // in `curried_spec`. For a function it contains `arguments_spec` + // children; for a plain value (e.g. `val pi : float`) it wraps a + // single `simple_type`. + // + // Treat both engines consistently by classifying as a function whenever + // a function_type node appears OR a curried_spec contains `arguments_spec`. + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "function_type" => return true, + "curried_spec" => { + for j in 0..child.child_count() { + if let Some(g) = child.child(j) { + if g.kind() == "arguments_spec" { + return true; + } + } + } + } + _ => {} + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::extractors::SymbolExtractor; + use tree_sitter::Parser; + + fn parse_source(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_fsharp::LANGUAGE_FSHARP.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + FSharpExtractor.extract(&tree, code.as_bytes(), "test.fs") + } + + fn parse_signature(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_fsharp::LANGUAGE_SIGNATURE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + FSharpExtractor.extract(&tree, code.as_bytes(), "test.fsi") + } + + #[test] + fn signature_extracts_val_declarations() { + let s = parse_signature("namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n"); + let add = s + .definitions + .iter() + .find(|d| d.name == "add") + .expect("val add should be extracted"); + assert_eq!(add.kind, "function"); + let pi = s + .definitions + .iter() + .find(|d| d.name == "pi") + .expect("val pi should be extracted"); + assert_eq!(pi.kind, "variable"); + } + + #[test] + fn signature_extracts_bare_val_declarations() { + let s = parse_signature("val negate : int -> int\nval count : int\n"); + assert!(s + .definitions + .iter() + .any(|d| d.name == "negate" && d.kind == "function")); + assert!(s + .definitions + .iter() + .any(|d| d.name == "count" && d.kind == "variable")); + } + + #[test] + fn source_grammar_does_not_extract_let_bindings_as_val() { + // `let x = 5` is a value_definition in the source grammar but its + // first child is `let`, not `val`. Our handler must not extract it + // (preserves prior `.fs` extraction parity — only function_declaration_left + // produces definitions in source files). + let s = parse_source("module M\n\nlet x = 5\n"); + assert!( + s.definitions.iter().all(|d| d.name != "x"), + "let bindings in .fs files must not be extracted as val definitions" + ); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index d826bb8ce..35ddf11a2 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -140,7 +140,7 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } - LanguageKind::FSharp => { + LanguageKind::FSharp | LanguageKind::FSharpSignature => { fsharp::FSharpExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } LanguageKind::ObjC => { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index cfd61fd17..5a9331790 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -28,6 +28,7 @@ pub enum LanguageKind { Ocaml, OcamlInterface, FSharp, + FSharpSignature, ObjC, Gleam, Julia, @@ -70,6 +71,7 @@ impl LanguageKind { Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", Self::FSharp => "fsharp", + Self::FSharpSignature => "fsharp-signature", Self::ObjC => "objc", Self::Gleam => "gleam", Self::Julia => "julia", @@ -120,7 +122,8 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), - "fs" | "fsx" | "fsi" => Some(Self::FSharp), + "fs" | "fsx" => Some(Self::FSharp), + "fsi" => Some(Self::FSharpSignature), "m" => Some(Self::ObjC), "gleam" => Some(Self::Gleam), "jl" => Some(Self::Julia), @@ -165,6 +168,7 @@ impl LanguageKind { "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), "fsharp" => Some(Self::FSharp), + "fsharp-signature" => Some(Self::FSharpSignature), "objc" => Some(Self::ObjC), "gleam" => Some(Self::Gleam), "julia" => Some(Self::Julia), @@ -207,6 +211,7 @@ impl LanguageKind { Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), Self::FSharp => tree_sitter_fsharp::LANGUAGE_FSHARP.into(), + Self::FSharpSignature => tree_sitter_fsharp::LANGUAGE_SIGNATURE.into(), Self::ObjC => tree_sitter_objc::LANGUAGE.into(), Self::Gleam => tree_sitter_gleam::LANGUAGE.into(), Self::Julia => tree_sitter_julia::LANGUAGE.into(), @@ -232,8 +237,8 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, FSharp, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity, - Verilog, + OcamlInterface, FSharp, FSharpSignature, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, + Groovy, R, Solidity, Verilog, ] } } @@ -304,6 +309,7 @@ mod tests { | LanguageKind::Ocaml | LanguageKind::OcamlInterface | LanguageKind::FSharp + | LanguageKind::FSharpSignature | LanguageKind::ObjC | LanguageKind::Gleam | LanguageKind::Julia @@ -320,7 +326,7 @@ mod tests { // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 35; + const EXPECTED_LEN: usize = 36; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/scripts/build-wasm.ts b/scripts/build-wasm.ts index c7d425573..733e4c149 100644 --- a/scripts/build-wasm.ts +++ b/scripts/build-wasm.ts @@ -206,6 +206,7 @@ const grammars = [ { name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' }, { name: 'tree-sitter-ocaml_interface', pkg: 'tree-sitter-ocaml', sub: 'grammars/interface' }, { name: 'tree-sitter-fsharp', pkg: 'tree-sitter-fsharp', sub: 'fsharp' }, + { name: 'tree-sitter-fsharp_signature', pkg: 'tree-sitter-fsharp', sub: 'fsharp_signature' }, { name: 'tree-sitter-gleam', pkg: 'tree-sitter-gleam', sub: null }, { name: 'tree-sitter-clojure', pkg: 'tree-sitter-clojure', sub: null }, { name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null }, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index ed4f8ca3f..8c8b8151c 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -233,6 +233,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], ['fsharp', FSHARP_AST_TYPES], + ['fsharp-signature', FSHARP_AST_TYPES], ['objc', OBJC_AST_TYPES], ['gleam', GLEAM_AST_TYPES], ['julia', JULIA_AST_TYPES], @@ -313,6 +314,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], ['fsharp', FSHARP_STRING_CONFIG], + ['fsharp-signature', FSHARP_STRING_CONFIG], ['objc', OBJC_STRING_CONFIG], ['gleam', GLEAM_STRING_CONFIG], ['julia', JULIA_STRING_CONFIG], diff --git a/src/domain/parser.ts b/src/domain/parser.ts index a94bbe49d..b4aaa366b 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -831,11 +831,18 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ }, { id: 'fsharp', - extensions: ['.fs', '.fsx', '.fsi'], + extensions: ['.fs', '.fsx'], grammarFile: 'tree-sitter-fsharp.wasm', extractor: extractFSharpSymbols, required: false, }, + { + id: 'fsharp-signature', + extensions: ['.fsi'], + grammarFile: 'tree-sitter-fsharp_signature.wasm', + extractor: extractFSharpSymbols, + required: false, + }, { id: 'gleam', extensions: ['.gleam'], diff --git a/src/domain/wasm-worker-entry.ts b/src/domain/wasm-worker-entry.ts index e8359a21d..ca02bca70 100644 --- a/src/domain/wasm-worker-entry.ts +++ b/src/domain/wasm-worker-entry.ts @@ -306,11 +306,18 @@ const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ }, { id: 'fsharp', - extensions: ['.fs', '.fsx', '.fsi'], + extensions: ['.fs', '.fsx'], grammarFile: 'tree-sitter-fsharp.wasm', extractor: extractFSharpSymbols, required: false, }, + { + id: 'fsharp-signature', + extensions: ['.fsi'], + grammarFile: 'tree-sitter-fsharp_signature.wasm', + extractor: extractFSharpSymbols, + required: false, + }, { id: 'gleam', extensions: ['.gleam'], diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index ef54dd34e..e2a077109 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -57,6 +57,9 @@ function walkFSharpNode( case 'dot_expression': handleDotExpression(node, ctx); break; + case 'value_definition': + handleValueDefinition(node, ctx, currentModule); + break; } for (let i = 0; i < node.childCount; i++) { @@ -251,3 +254,65 @@ function handleDotExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { ctx.calls.push(call); } } + +// Handle `val name : type` declarations in `.fsi` signature files. +// The signature grammar reuses `value_definition` for `val` bindings, +// distinguished from the source grammar's `let` bindings by the first +// child being the literal `val` keyword. Source-file `value_definition` +// nodes (which start with `let`) are intentionally ignored to preserve +// `.fs` extractor parity. +function handleValueDefinition( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + const first = node.child(0); + if (!first || first.type !== 'val') return; + + const declLeft = findChild(node, 'value_declaration_left'); + if (!declLeft) return; + + const pattern = findChild(declLeft, 'identifier_pattern'); + if (!pattern) return; + + const ident = + findChild(findChild(pattern, 'long_identifier_or_op') ?? pattern, 'identifier') ?? + findChild(pattern, 'identifier'); + if (!ident) return; + + // The two grammar versions use different shapes for type signatures: + // • WASM (npm tree-sitter-fsharp 0.1.0): `function_type` is the explicit + // function-type kind, only present for `a -> b` types. + // • Native (cargo tree-sitter-fsharp 0.3.0): every type signature is + // wrapped in `curried_spec`. For a function it contains `arguments_spec` + // children; for a plain value (e.g. `val pi : float`) it wraps a single + // `simple_type`. + // Treat both engines consistently by classifying as a function whenever + // function_type appears OR a curried_spec contains an arguments_spec child. + let hasFunctionType = false; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === 'function_type') { + hasFunctionType = true; + break; + } + if (c.type === 'curried_spec') { + for (let j = 0; j < c.childCount; j++) { + if (c.child(j)?.type === 'arguments_spec') { + hasFunctionType = true; + break; + } + } + if (hasFunctionType) break; + } + } + + const name = currentModule ? `${currentModule}.${ident.text}` : ident.text; + ctx.definitions.push({ + name, + kind: hasFunctionType ? 'function' : 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} diff --git a/src/types.ts b/src/types.ts index 8e4bf13ca..542b81bc0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -99,6 +99,7 @@ export type LanguageId = | 'ocaml' | 'ocaml-interface' | 'fsharp' + | 'fsharp-signature' | 'gleam' | 'clojure' | 'julia' diff --git a/tests/parsers/fsharp-signature.test.ts b/tests/parsers/fsharp-signature.test.ts new file mode 100644 index 000000000..02e8ffa2d --- /dev/null +++ b/tests/parsers/fsharp-signature.test.ts @@ -0,0 +1,63 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractFSharpSymbols } from '../../src/domain/parser.js'; + +describe('F# signature (.fsi) parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseFSi(code: string) { + const parser = parsers.get('fsharp-signature'); + if (!parser) throw new Error('F# signature parser not available'); + const tree = parser.parse(code); + return { tree, symbols: extractFSharpSymbols(tree, 'test.fsi') }; + } + + it('parses bare val declarations without ERROR nodes', () => { + // The main F# grammar produces ERROR nodes for `val` declarations + // (#1114); the signature grammar parses them as `value_definition`. + const { tree, symbols } = parseFSi( + `namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n`, + ); + expect(tree.rootNode.hasError).toBe(false); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'pi', kind: 'variable' }), + ); + }); + + it('extracts bare top-level val declarations', () => { + const { tree, symbols } = parseFSi(`val negate : int -> int\nval count : int\n`); + expect(tree.rootNode.hasError).toBe(false); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'negate', kind: 'function' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'count', kind: 'variable' }), + ); + }); + + it('extracts val declarations nested inside a module signature', () => { + // The signature grammar requires `=` after the module name for nested + // module signatures (`module Foo = ...`), unlike F# source files. + const { tree, symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); + expect(tree.rootNode.hasError).toBe(false); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + }); + + it('does not crash when the grammar produces ERROR nodes for unsupported constructs', () => { + // `open` at the namespace top level is not handled by the upstream + // signature grammar v0.3.0 — it produces ERROR nodes but val + // declarations still recover via the parser's error recovery. + const { symbols } = parseFSi(`namespace X\n\nopen System\n\nval read : string -> string\n`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'read', kind: 'function' }), + ); + }); +}); From 9cfee7bebe468cf1ad83c1fe1cac7e5b036dd2d1 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 19 May 2026 02:08:15 -0600 Subject: [PATCH 2/5] fix(fsharp): qualify val declarations inside nested signature modules (#1162) Greptile review caught two .fsi extraction corners: 1. **Module qualification dropped for `val` inside `module Foo = ...`.** The cargo 0.3.0 signature grammar wraps nested signature modules in a `module_defn` node (distinct from `named_module`), so the existing `enclosing_module_name` walk never reached it and `val add : int -> int` was indexed as `add` instead of `Foo.add`. Both engines now handle `module_defn`, emit it as a `module` definition with the dotted parent path, and qualify nested `val` declarations accordingly. The WASM 0.1.0 signature grammar still emits ERROR nodes for the same construct, so the WASM-only test continues to assert `add` (with an explicit comment pointing at the grammar bump tracked under #1161). 2. **`val mutable count: int = 0` in `.fs` source files.** Empirically confirmed in both engines that the source grammar parses this as a `member_defn` node (NOT a `value_definition`), so the new `val`-style handler never sees it. Added regression tests in both engines so a future grammar change cannot silently start mis-classifying class fields as variables. --- .../codegraph-core/src/extractors/fsharp.rs | 101 +++++++++++++++++- src/extractors/fsharp.ts | 30 ++++++ tests/parsers/fsharp-signature.test.ts | 12 ++- tests/parsers/fsharp.test.ts | 13 +++ 4 files changed, 148 insertions(+), 8 deletions(-) diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs index ec7c41a72..9b3aea5c5 100644 --- a/crates/codegraph-core/src/extractors/fsharp.rs +++ b/crates/codegraph-core/src/extractors/fsharp.rs @@ -19,6 +19,7 @@ impl SymbolExtractor for FSharpExtractor { fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { "named_module" => handle_named_module(node, source, symbols), + "module_defn" => handle_module_defn(node, source, symbols), "function_declaration_left" => handle_function_decl(node, source, symbols), "type_definition" => handle_type_def(node, source, symbols), "import_decl" => handle_import_decl(node, source, symbols), @@ -29,11 +30,40 @@ fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep } } -/// Find the enclosing `named_module` and return its identifier text. +/// Find the enclosing module name, walking up through any number of +/// `module_defn` (nested signature modules) wrappers before reaching the +/// top-level `named_module`. Returns the dotted path, e.g. `Outer.Inner`. +/// +/// Source files use `named_module` for the top-level `module M = …` and +/// the signature grammar (cargo 0.3.0) wraps nested signature modules in +/// `module_defn` nodes. WASM signature grammar (npm 0.1.0) currently +/// produces ERROR nodes for nested signature modules so we cannot recover +/// qualification there — tracked under #1161. fn enclosing_module_name(node: &Node, source: &[u8]) -> Option { - let module = find_parent_of_type(node, "named_module")?; - let id = find_child(&module, "long_identifier")?; - Some(node_text(&id, source).to_string()) + let mut parts: Vec = Vec::new(); + let mut current = node.parent(); + while let Some(p) = current { + match p.kind() { + "module_defn" => { + if let Some(id) = find_child(&p, "identifier") { + parts.push(node_text(&id, source).to_string()); + } + } + "named_module" => { + if let Some(id) = find_child(&p, "long_identifier") { + parts.push(node_text(&id, source).to_string()); + } + break; + } + _ => {} + } + current = p.parent(); + } + if parts.is_empty() { + return None; + } + parts.reverse(); + Some(parts.join(".")) } fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { @@ -53,6 +83,36 @@ fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }); } +/// Handle nested signature modules (`module Foo = ...`) emitted by the +/// cargo 0.3.0 grammar as `module_defn`. Emits a `module` definition with +/// the dotted parent path (e.g. `Outer.Foo`) and lets the DFS walker +/// continue into child `val` declarations, which pick up the same path via +/// `enclosing_module_name`. +fn handle_module_defn(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_child(node, "identifier") { + Some(n) => n, + None => return, + }; + let raw = node_text(&name_node, source).to_string(); + // `enclosing_module_name` walks `node.parent()` upward, so calling it on + // the `module_defn` itself yields the dotted prefix of its enclosing + // module(s) without including this module's own name. + let qualified = match enclosing_module_name(node, source) { + Some(prefix) if !prefix.is_empty() => format!("{}.{}", prefix, raw), + _ => raw, + }; + symbols.definitions.push(Definition { + name: qualified, + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // function_declaration_left: first child is the function name identifier, // followed by argument_patterns. @@ -451,4 +511,37 @@ mod tests { "let bindings in .fs files must not be extracted as val definitions" ); } + + #[test] + fn signature_qualifies_val_inside_nested_module_defn() { + // The cargo 0.3.0 signature grammar wraps `module Foo = ...` as a + // `module_defn` node (the WASM 0.1.0 grammar emits ERROR for this + // construct — tracked under #1161). The `val` declarations inside + // must be qualified with the module path. + let s = parse_signature("namespace X\n\nmodule Foo =\n val add : int -> int\n"); + assert!( + s.definitions.iter().any(|d| d.name == "Foo.add" && d.kind == "function"), + "val add nested under `module Foo =` must be indexed as `Foo.add`, got: {:?}", + s.definitions.iter().map(|d| &d.name).collect::>(), + ); + assert!( + s.definitions.iter().any(|d| d.name == "Foo" && d.kind == "module"), + "module Foo must be indexed as a module definition" + ); + } + + #[test] + fn source_grammar_does_not_extract_val_mutable_class_fields() { + // `val mutable count: int = 0` inside a class is parsed as a `member_defn` + // node in the source grammar — NOT a `value_definition` — so our + // `value_definition`/`val`-first-child handler does not see it. + // This regression guard makes that empirical fact explicit. + let s = parse_source( + "module M\n\ntype C() =\n val mutable count: int = 0\n", + ); + assert!( + s.definitions.iter().all(|d| d.name != "count"), + "val mutable class fields must not be extracted by the signature value_definition handler" + ); + } } diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index e2a077109..45a02cf78 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -42,6 +42,15 @@ function walkFSharpNode( case 'named_module': nextModule = handleNamedModule(node, ctx); break; + case 'module_defn': + // Nested signature module (`module Foo = ...`) in `.fsi` files, + // produced by the cargo 0.3.0 grammar. The WASM 0.1.0 signature + // grammar currently emits ERROR nodes for this construct, so it + // never reaches this branch there (tracked in #1161). When it + // does fire, accumulate the dotted module path so nested `val` + // declarations are qualified as `Outer.Inner.foo`. + nextModule = handleModuleDefn(node, ctx, currentModule); + break; case 'function_declaration_left': handleFunctionDecl(node, ctx, currentModule); break; @@ -82,6 +91,27 @@ function handleNamedModule(node: TreeSitterNode, ctx: ExtractorOutput): string | return nameNode.text; } +function handleModuleDefn( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): string | null { + // `module_defn` (cargo 0.3.0 signature grammar) wraps `module Foo = ...` + // sections inside an outer `namespace` or another module. The name is a + // direct `identifier` child. + const nameNode = findChild(node, 'identifier'); + if (!nameNode) return currentModule; + + const qualified = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text; + ctx.definitions.push({ + name: qualified, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + return qualified; +} + function handleFunctionDecl( node: TreeSitterNode, ctx: ExtractorOutput, diff --git a/tests/parsers/fsharp-signature.test.ts b/tests/parsers/fsharp-signature.test.ts index 02e8ffa2d..f6cff46c5 100644 --- a/tests/parsers/fsharp-signature.test.ts +++ b/tests/parsers/fsharp-signature.test.ts @@ -42,10 +42,14 @@ describe('F# signature (.fsi) parser', () => { }); it('extracts val declarations nested inside a module signature', () => { - // The signature grammar requires `=` after the module name for nested - // module signatures (`module Foo = ...`), unlike F# source files. - const { tree, symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); - expect(tree.rootNode.hasError).toBe(false); + // The WASM tree-sitter-fsharp 0.1.0 signature grammar does NOT produce a + // `module_defn` for `module Foo = ...` — it emits ERROR nodes and the + // `val` declarations float to the top level (so they're indexed as + // `add`, not `Foo.add`). The cargo 0.3.0 grammar parses it correctly + // and the Rust extractor qualifies as `Foo.add`. Grammar version skew + // is tracked under #1161; once npm bumps to 0.3.0+ this test should + // assert `Foo.add` to match the native engine. + const { symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); expect(symbols.definitions).toContainEqual( expect.objectContaining({ name: 'add', kind: 'function' }), ); diff --git a/tests/parsers/fsharp.test.ts b/tests/parsers/fsharp.test.ts index 4957617b8..59c2f5473 100644 --- a/tests/parsers/fsharp.test.ts +++ b/tests/parsers/fsharp.test.ts @@ -49,4 +49,17 @@ open System.IO`); const symbols = parseFSharp(`let result = List.map (fun x -> x + 1) [1; 2; 3]`); expect(symbols.calls.length).toBeGreaterThanOrEqual(1); }); + + it('does not extract `val mutable` class fields as definitions', () => { + // `val mutable count: int = 0` inside a class is parsed as `member_defn` + // in the source grammar — NOT `value_definition` — so the signature + // `val`-style handler must not produce a definition for it. + const symbols = parseFSharp(`module M + +type C() = + val mutable count: int = 0 +`); + expect(symbols.definitions.find((d) => d.name === 'count')).toBeUndefined(); + expect(symbols.definitions.find((d) => d.name === 'M.count')).toBeUndefined(); + }); }); From adcaf402fb118f98c840fc9e6ed3411960d8a60a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 19 May 2026 17:11:09 -0600 Subject: [PATCH 3/5] chore(fsharp): align npm grammar with cargo at v0.3.0 (#1165) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(fsharp): align npm grammar with cargo at v0.3.0 The WASM engine pulled tree-sitter-fsharp 0.1.0 from npm while the native engine used 0.3.0 from crates.io. The two versions diverged in how they parse type signatures in .fsi files: 0.1.0 emits `function_type` nodes for `a -> b` types, while 0.3.0 wraps every signature in `curried_spec` with `arguments_spec` children for function shapes. The F# extractor was forced to detect both shapes simultaneously, which is fragile — future grammar churn could silently desync further. * package.json now installs tree-sitter-fsharp from the ionide v0.3.0 GitHub tarball (npm has no 0.3.0 release; ionide is the upstream the cargo crate also tracks). Lockfile pins via SRI hash. * Both extractors now check only `curried_spec` → `arguments_spec`, removing the dead `function_type` branch from each. docs check acknowledged: README's F# row already covers .fs/.fsx/.fsi and the user-facing language count is unchanged; the grammar version is an internal implementation detail. Closes #1161 * docs(fsharp): explain tree-sitter-fsharp tarball pin (#1165) --- .../codegraph-core/src/extractors/fsharp.rs | 32 ++++----------- package-lock.json | 14 +++---- package.json | 2 +- src/extractors/fsharp.ts | 39 ++++++++----------- 4 files changed, 32 insertions(+), 55 deletions(-) diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs index 9b3aea5c5..1e68c46db 100644 --- a/crates/codegraph-core/src/extractors/fsharp.rs +++ b/crates/codegraph-core/src/extractors/fsharp.rs @@ -415,31 +415,15 @@ fn extract_value_name(decl_left: &Node, source: &[u8]) -> Option { } fn has_function_type(node: &Node) -> bool { - // The two grammar versions use different node shapes for type signatures: - // - // • WASM (tree-sitter-fsharp npm 0.1.0): `function_type` is the explicit - // function-type kind, only present for `a -> b` types. - // • Native (tree-sitter-fsharp 0.3.0): every type signature is wrapped - // in `curried_spec`. For a function it contains `arguments_spec` - // children; for a plain value (e.g. `val pi : float`) it wraps a - // single `simple_type`. - // - // Treat both engines consistently by classifying as a function whenever - // a function_type node appears OR a curried_spec contains `arguments_spec`. - for i in 0..node.child_count() { - let Some(child) = node.child(i) else { continue }; - match child.kind() { - "function_type" => return true, - "curried_spec" => { - for j in 0..child.child_count() { - if let Some(g) = child.child(j) { - if g.kind() == "arguments_spec" { - return true; - } - } - } + // The grammar wraps every type signature in `curried_spec`. A function type + // (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec` + // children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`. + let Some(curried) = find_child(node, "curried_spec") else { return false }; + for i in 0..curried.child_count() { + if let Some(child) = curried.child(i) { + if child.kind() == "arguments_spec" { + return true; } - _ => {} } } false diff --git a/package-lock.json b/package-lock.json index 48254d970..7c2cf2a22 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,7 +39,7 @@ "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", - "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-groovy": "^0.1.2", @@ -7445,18 +7445,18 @@ } }, "node_modules/tree-sitter-fsharp": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/tree-sitter-fsharp/-/tree-sitter-fsharp-0.1.0.tgz", - "integrity": "sha512-TCK+Jkg3qvEe4o6JxqUlE+vUc9lWhHhD7Toglu5Y04/PKa9DgACzqU5Jp9BYZnyhgGLEe+30kVOyFTY/iC/n1Q==", + "version": "0.3.0", + "resolved": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", + "integrity": "sha512-fXN3uk3m9PaOK5MrylouFYvLsu5LbY64oyB0iTlk17fQ290XfX/arrhfGYIhiscaC6fVsoxv2MS0pOJPFzDTxw==", "dev": true, "hasInstallScript": true, "license": "MIT", "dependencies": { - "node-addon-api": "^8.1.0", - "node-gyp-build": "^4.8.2" + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" }, "peerDependencies": { - "tree-sitter": "^0.21.0" + "tree-sitter": "^0.22.4" }, "peerDependenciesMeta": { "tree_sitter": { diff --git a/package.json b/package.json index 1df8cb376..8f95159f5 100644 --- a/package.json +++ b/package.json @@ -162,7 +162,7 @@ "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", - "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-groovy": "^0.1.2", diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index 45a02cf78..7dc1e6163 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -10,6 +10,13 @@ import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from F# files. * + * Grammar source: `tree-sitter-fsharp` v0.3.0 installed via a pinned GitHub + * tarball in `package.json` because the ionide/tree-sitter-fsharp project has + * no v0.3.0 release published to the npm registry. The cargo crate the native + * engine uses is also v0.3.0; both engines must stay aligned. Upgrading + * requires a manual edit of the tarball URL in `package.json` and + * `package-lock.json` — `npm update` will not bump this entry. + * * tree-sitter-fsharp grammar notes: * - named_module: top-level module declaration * - function_declaration_left: LHS of `let name params = ...` @@ -310,31 +317,17 @@ function handleValueDefinition( findChild(pattern, 'identifier'); if (!ident) return; - // The two grammar versions use different shapes for type signatures: - // • WASM (npm tree-sitter-fsharp 0.1.0): `function_type` is the explicit - // function-type kind, only present for `a -> b` types. - // • Native (cargo tree-sitter-fsharp 0.3.0): every type signature is - // wrapped in `curried_spec`. For a function it contains `arguments_spec` - // children; for a plain value (e.g. `val pi : float`) it wraps a single - // `simple_type`. - // Treat both engines consistently by classifying as a function whenever - // function_type appears OR a curried_spec contains an arguments_spec child. + // The grammar wraps every type signature in `curried_spec`. A function type + // (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec` + // children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`. + const curriedSpec = findChild(node, 'curried_spec'); let hasFunctionType = false; - for (let i = 0; i < node.childCount; i++) { - const c = node.child(i); - if (!c) continue; - if (c.type === 'function_type') { - hasFunctionType = true; - break; - } - if (c.type === 'curried_spec') { - for (let j = 0; j < c.childCount; j++) { - if (c.child(j)?.type === 'arguments_spec') { - hasFunctionType = true; - break; - } + if (curriedSpec) { + for (let i = 0; i < curriedSpec.childCount; i++) { + if (curriedSpec.child(i)?.type === 'arguments_spec') { + hasFunctionType = true; + break; } - if (hasFunctionType) break; } } From c12bb6029753b18791ba169f75e120129a79c889 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 19 May 2026 20:08:38 -0600 Subject: [PATCH 4/5] fix(fsharp): restore dual function_type/curried_spec detection for val (#1162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The npm and cargo tree-sitter-fsharp 0.3.0 grammars — though sharing a version tag — still emit type signatures with different node shapes: WASM 0.3.0 produces `function_type` directly under `value_definition`, while cargo 0.3.0 wraps every signature in `curried_spec` with `arguments_spec` children for function types. #1165 removed the `function_type` branch on the assumption that both grammars had converged at v0.3.0, which broke WASM extraction: every `val name : a -> b` declaration was being indexed as a `variable` instead of a `function`. Restore the dual-shape detection in the TypeScript extractor and update the documentation accordingly. Also clarifies the nested-module test comment in fsharp-signature.test to reflect that the WASM signature grammar is now at v0.3.0 but still emits ERROR nodes for `module Foo = ...` (the fix is still pending, tracked under #1161). --- .../codegraph-core/src/extractors/fsharp.rs | 6 ++-- src/extractors/fsharp.ts | 35 ++++++++++++++----- tests/parsers/fsharp-signature.test.ts | 15 ++++---- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs index 1e68c46db..752ffb05f 100644 --- a/crates/codegraph-core/src/extractors/fsharp.rs +++ b/crates/codegraph-core/src/extractors/fsharp.rs @@ -36,9 +36,9 @@ fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep /// /// Source files use `named_module` for the top-level `module M = …` and /// the signature grammar (cargo 0.3.0) wraps nested signature modules in -/// `module_defn` nodes. WASM signature grammar (npm 0.1.0) currently -/// produces ERROR nodes for nested signature modules so we cannot recover -/// qualification there — tracked under #1161. +/// `module_defn` nodes. The WASM signature grammar currently emits ERROR +/// nodes for nested signature modules so we cannot recover qualification +/// there — tracked under #1161. fn enclosing_module_name(node: &Node, source: &[u8]) -> Option { let mut parts: Vec = Vec::new(); let mut current = node.parent(); diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index 7dc1e6163..7c3b924c9 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -317,17 +317,34 @@ function handleValueDefinition( findChild(pattern, 'identifier'); if (!ident) return; - // The grammar wraps every type signature in `curried_spec`. A function type - // (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec` - // children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`. - const curriedSpec = findChild(node, 'curried_spec'); + // The npm and cargo tree-sitter-fsharp 0.3.0 grammars — though sharing a + // version tag — emit type signatures with different node shapes: + // • WASM (npm 0.3.0 ionide tarball): `function_type` is the explicit + // function-type kind, present as a direct child of `value_definition` + // for `a -> b` types; plain values (e.g. `val pi : float`) appear as + // `simple_type`. + // • Native (cargo 0.3.0): every type signature is wrapped in + // `curried_spec`. A function type contains one or more `arguments_spec` + // children; a plain value wraps a single `simple_type`. + // Classify as a function whenever `function_type` appears OR a + // `curried_spec` contains an `arguments_spec` child, so both engines stay + // in parity until the grammars converge. let hasFunctionType = false; - if (curriedSpec) { - for (let i = 0; i < curriedSpec.childCount; i++) { - if (curriedSpec.child(i)?.type === 'arguments_spec') { - hasFunctionType = true; - break; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === 'function_type') { + hasFunctionType = true; + break; + } + if (c.type === 'curried_spec') { + for (let j = 0; j < c.childCount; j++) { + if (c.child(j)?.type === 'arguments_spec') { + hasFunctionType = true; + break; + } } + if (hasFunctionType) break; } } diff --git a/tests/parsers/fsharp-signature.test.ts b/tests/parsers/fsharp-signature.test.ts index f6cff46c5..b60dec4d0 100644 --- a/tests/parsers/fsharp-signature.test.ts +++ b/tests/parsers/fsharp-signature.test.ts @@ -42,13 +42,14 @@ describe('F# signature (.fsi) parser', () => { }); it('extracts val declarations nested inside a module signature', () => { - // The WASM tree-sitter-fsharp 0.1.0 signature grammar does NOT produce a - // `module_defn` for `module Foo = ...` — it emits ERROR nodes and the - // `val` declarations float to the top level (so they're indexed as - // `add`, not `Foo.add`). The cargo 0.3.0 grammar parses it correctly - // and the Rust extractor qualifies as `Foo.add`. Grammar version skew - // is tracked under #1161; once npm bumps to 0.3.0+ this test should - // assert `Foo.add` to match the native engine. + // The WASM tree-sitter-fsharp signature grammar (currently v0.3.0) does + // NOT yet produce a `module_defn` for `module Foo = ...` — it emits + // ERROR nodes and the `val` declarations float to the top level (so + // they're indexed as `add`, not `Foo.add`). The cargo 0.3.0 grammar + // parses it correctly and the Rust extractor qualifies as `Foo.add`. + // The WASM grammar fix is tracked under #1161; once the signature + // grammar emits `module_defn` for nested modules, this assertion + // should be updated to expect `Foo.add` to match the native engine. const { symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); expect(symbols.definitions).toContainEqual( expect.objectContaining({ name: 'add', kind: 'function' }), From efb8ed3f214a3377d65f492862661d6577d5fac0 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 19 May 2026 21:04:15 -0600 Subject: [PATCH 5/5] test(fsharp): expect Foo.add qualified name after npm grammar bump (#1162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WASM tree-sitter-fsharp signature grammar was upgraded from v0.1.0 to v0.3.0 in adcaf40. v0.3.0 emits `module_defn` for nested `module Foo = ...` blocks (v0.1.0 emitted ERROR nodes), so the existing qualification logic now fires for the WASM engine too — `val` symbols get the parent module prefix in both engines. The signature test still expected the pre-bump behaviour (bare `add`), which made it fail in CI where the grammar bump landed. Update the assertion to lock in engine parity: - assert the qualified `Foo.add` function and the outer `Foo` module - assert the unqualified `add` is NOT emitted, so any future regression where the walker drops the enclosing module is caught Also refresh the `module_defn` comment in src/extractors/fsharp.ts — it still claimed the WASM grammar emitted ERROR nodes for this construct, which became stale after the v0.3.0 bump. --- src/extractors/fsharp.ts | 9 ++++----- tests/parsers/fsharp-signature.test.ts | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index 7c3b924c9..45569fe97 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -51,11 +51,10 @@ function walkFSharpNode( break; case 'module_defn': // Nested signature module (`module Foo = ...`) in `.fsi` files, - // produced by the cargo 0.3.0 grammar. The WASM 0.1.0 signature - // grammar currently emits ERROR nodes for this construct, so it - // never reaches this branch there (tracked in #1161). When it - // does fire, accumulate the dotted module path so nested `val` - // declarations are qualified as `Outer.Inner.foo`. + // emitted by both the WASM (npm ionide tarball v0.3.0) and cargo + // v0.3.0 tree-sitter-fsharp signature grammars. Accumulate the + // dotted module path so nested `val` declarations are qualified + // as `Outer.Inner.foo` in parity with the native engine. nextModule = handleModuleDefn(node, ctx, currentModule); break; case 'function_declaration_left': diff --git a/tests/parsers/fsharp-signature.test.ts b/tests/parsers/fsharp-signature.test.ts index b60dec4d0..4fa7cfd6e 100644 --- a/tests/parsers/fsharp-signature.test.ts +++ b/tests/parsers/fsharp-signature.test.ts @@ -42,16 +42,21 @@ describe('F# signature (.fsi) parser', () => { }); it('extracts val declarations nested inside a module signature', () => { - // The WASM tree-sitter-fsharp signature grammar (currently v0.3.0) does - // NOT yet produce a `module_defn` for `module Foo = ...` — it emits - // ERROR nodes and the `val` declarations float to the top level (so - // they're indexed as `add`, not `Foo.add`). The cargo 0.3.0 grammar - // parses it correctly and the Rust extractor qualifies as `Foo.add`. - // The WASM grammar fix is tracked under #1161; once the signature - // grammar emits `module_defn` for nested modules, this assertion - // should be updated to expect `Foo.add` to match the native engine. + // Both the WASM (npm ionide tarball v0.3.0) and the cargo v0.3.0 + // tree-sitter-fsharp signature grammars emit `module_defn` for + // `module Foo = ...`, so `val` declarations nested inside are + // qualified with the module path (`Foo.add`) in both engines. The + // outer `module Foo` is also indexed as a `module` definition. const { symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Foo', kind: 'module' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Foo.add', kind: 'function' }), + ); + // The unqualified name must NOT appear — that would mean the walker + // failed to thread the enclosing module through to `handleValueDefinition`. + expect(symbols.definitions).not.toContainEqual( expect.objectContaining({ name: 'add', kind: 'function' }), ); });