diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs index 90008417..752ffb05 100644 --- a/crates/codegraph-core/src/extractors/fsharp.rs +++ b/crates/codegraph-core/src/extractors/fsharp.rs @@ -19,20 +19,51 @@ impl SymbolExtractor for FSharpExtractor { fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { "named_module" => handle_named_module(node, source, symbols), + "module_defn" => handle_module_defn(node, source, symbols), "function_declaration_left" => handle_function_decl(node, source, symbols), "type_definition" => handle_type_def(node, source, symbols), "import_decl" => handle_import_decl(node, source, symbols), "application_expression" => handle_application(node, source, symbols), "dot_expression" => handle_dot_expression(node, source, symbols), + "value_definition" => handle_value_definition(node, source, symbols), _ => {} } } -/// Find the enclosing `named_module` and return its identifier text. +/// Find the enclosing module name, walking up through any number of +/// `module_defn` (nested signature modules) wrappers before reaching the +/// top-level `named_module`. Returns the dotted path, e.g. `Outer.Inner`. +/// +/// Source files use `named_module` for the top-level `module M = …` and +/// the signature grammar (cargo 0.3.0) wraps nested signature modules in +/// `module_defn` nodes. The WASM signature grammar currently emits ERROR +/// nodes for nested signature modules so we cannot recover qualification +/// there — tracked under #1161. fn enclosing_module_name(node: &Node, source: &[u8]) -> Option { - let module = find_parent_of_type(node, "named_module")?; - let id = find_child(&module, "long_identifier")?; - Some(node_text(&id, source).to_string()) + let mut parts: Vec = Vec::new(); + let mut current = node.parent(); + while let Some(p) = current { + match p.kind() { + "module_defn" => { + if let Some(id) = find_child(&p, "identifier") { + parts.push(node_text(&id, source).to_string()); + } + } + "named_module" => { + if let Some(id) = find_child(&p, "long_identifier") { + parts.push(node_text(&id, source).to_string()); + } + break; + } + _ => {} + } + current = p.parent(); + } + if parts.is_empty() { + return None; + } + parts.reverse(); + Some(parts.join(".")) } fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { @@ -52,6 +83,36 @@ fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }); } +/// Handle nested signature modules (`module Foo = ...`) emitted by the +/// cargo 0.3.0 grammar as `module_defn`. Emits a `module` definition with +/// the dotted parent path (e.g. `Outer.Foo`) and lets the DFS walker +/// continue into child `val` declarations, which pick up the same path via +/// `enclosing_module_name`. +fn handle_module_defn(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_child(node, "identifier") { + Some(n) => n, + None => return, + }; + let raw = node_text(&name_node, source).to_string(); + // `enclosing_module_name` walks `node.parent()` upward, so calling it on + // the `module_defn` itself yields the dotted prefix of its enclosing + // module(s) without including this module's own name. + let qualified = match enclosing_module_name(node, source) { + Some(prefix) if !prefix.is_empty() => format!("{}.{}", prefix, raw), + _ => raw, + }; + symbols.definitions.push(Definition { + name: qualified, + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // function_declaration_left: first child is the function name identifier, // followed by argument_patterns. @@ -300,3 +361,171 @@ fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) }); } } + +/// Handle `val name : type` declarations in `.fsi` signature files. +/// +/// The signature grammar reuses the `value_definition` node kind for `val` +/// declarations, distinguished from the source grammar's `let` bindings by +/// the first child being the literal `val` keyword. Source-file +/// `value_definition` nodes (which start with `let`) are intentionally +/// ignored here to preserve `.fs` extractor parity. +fn handle_value_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let first = match node.child(0) { + Some(c) => c, + None => return, + }; + if first.kind() != "val" { + return; + } + + let decl_left = match find_child(node, "value_declaration_left") { + Some(n) => n, + None => return, + }; + let name = match extract_value_name(&decl_left, source) { + Some(n) => n, + None => return, + }; + + let kind = if has_function_type(node) { "function" } else { "variable" }; + let module_name = enclosing_module_name(node, source); + let qualified = match module_name { + Some(m) => format!("{}.{}", m, name), + None => name, + }; + + symbols.definitions.push(Definition { + name: qualified, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn extract_value_name(decl_left: &Node, source: &[u8]) -> Option { + let pattern = find_child(decl_left, "identifier_pattern")?; + let ident = find_child(&pattern, "long_identifier_or_op") + .and_then(|n| find_child(&n, "identifier")) + .or_else(|| find_child(&pattern, "identifier"))?; + Some(node_text(&ident, source).to_string()) +} + +fn has_function_type(node: &Node) -> bool { + // The grammar wraps every type signature in `curried_spec`. A function type + // (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec` + // children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`. + let Some(curried) = find_child(node, "curried_spec") else { return false }; + for i in 0..curried.child_count() { + if let Some(child) = curried.child(i) { + if child.kind() == "arguments_spec" { + return true; + } + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::extractors::SymbolExtractor; + use tree_sitter::Parser; + + fn parse_source(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_fsharp::LANGUAGE_FSHARP.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + FSharpExtractor.extract(&tree, code.as_bytes(), "test.fs") + } + + fn parse_signature(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_fsharp::LANGUAGE_SIGNATURE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + FSharpExtractor.extract(&tree, code.as_bytes(), "test.fsi") + } + + #[test] + fn signature_extracts_val_declarations() { + let s = parse_signature("namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n"); + let add = s + .definitions + .iter() + .find(|d| d.name == "add") + .expect("val add should be extracted"); + assert_eq!(add.kind, "function"); + let pi = s + .definitions + .iter() + .find(|d| d.name == "pi") + .expect("val pi should be extracted"); + assert_eq!(pi.kind, "variable"); + } + + #[test] + fn signature_extracts_bare_val_declarations() { + let s = parse_signature("val negate : int -> int\nval count : int\n"); + assert!(s + .definitions + .iter() + .any(|d| d.name == "negate" && d.kind == "function")); + assert!(s + .definitions + .iter() + .any(|d| d.name == "count" && d.kind == "variable")); + } + + #[test] + fn source_grammar_does_not_extract_let_bindings_as_val() { + // `let x = 5` is a value_definition in the source grammar but its + // first child is `let`, not `val`. Our handler must not extract it + // (preserves prior `.fs` extraction parity — only function_declaration_left + // produces definitions in source files). + let s = parse_source("module M\n\nlet x = 5\n"); + assert!( + s.definitions.iter().all(|d| d.name != "x"), + "let bindings in .fs files must not be extracted as val definitions" + ); + } + + #[test] + fn signature_qualifies_val_inside_nested_module_defn() { + // The cargo 0.3.0 signature grammar wraps `module Foo = ...` as a + // `module_defn` node (the WASM 0.1.0 grammar emits ERROR for this + // construct — tracked under #1161). The `val` declarations inside + // must be qualified with the module path. + let s = parse_signature("namespace X\n\nmodule Foo =\n val add : int -> int\n"); + assert!( + s.definitions.iter().any(|d| d.name == "Foo.add" && d.kind == "function"), + "val add nested under `module Foo =` must be indexed as `Foo.add`, got: {:?}", + s.definitions.iter().map(|d| &d.name).collect::>(), + ); + assert!( + s.definitions.iter().any(|d| d.name == "Foo" && d.kind == "module"), + "module Foo must be indexed as a module definition" + ); + } + + #[test] + fn source_grammar_does_not_extract_val_mutable_class_fields() { + // `val mutable count: int = 0` inside a class is parsed as a `member_defn` + // node in the source grammar — NOT a `value_definition` — so our + // `value_definition`/`val`-first-child handler does not see it. + // This regression guard makes that empirical fact explicit. + let s = parse_source( + "module M\n\ntype C() =\n val mutable count: int = 0\n", + ); + assert!( + s.definitions.iter().all(|d| d.name != "count"), + "val mutable class fields must not be extracted by the signature value_definition handler" + ); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index d826bb8c..35ddf11a 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -140,7 +140,7 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } - LanguageKind::FSharp => { + LanguageKind::FSharp | LanguageKind::FSharpSignature => { fsharp::FSharpExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } LanguageKind::ObjC => { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index cfd61fd1..5a933179 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -28,6 +28,7 @@ pub enum LanguageKind { Ocaml, OcamlInterface, FSharp, + FSharpSignature, ObjC, Gleam, Julia, @@ -70,6 +71,7 @@ impl LanguageKind { Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", Self::FSharp => "fsharp", + Self::FSharpSignature => "fsharp-signature", Self::ObjC => "objc", Self::Gleam => "gleam", Self::Julia => "julia", @@ -120,7 +122,8 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), - "fs" | "fsx" | "fsi" => Some(Self::FSharp), + "fs" | "fsx" => Some(Self::FSharp), + "fsi" => Some(Self::FSharpSignature), "m" => Some(Self::ObjC), "gleam" => Some(Self::Gleam), "jl" => Some(Self::Julia), @@ -165,6 +168,7 @@ impl LanguageKind { "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), "fsharp" => Some(Self::FSharp), + "fsharp-signature" => Some(Self::FSharpSignature), "objc" => Some(Self::ObjC), "gleam" => Some(Self::Gleam), "julia" => Some(Self::Julia), @@ -207,6 +211,7 @@ impl LanguageKind { Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), Self::FSharp => tree_sitter_fsharp::LANGUAGE_FSHARP.into(), + Self::FSharpSignature => tree_sitter_fsharp::LANGUAGE_SIGNATURE.into(), Self::ObjC => tree_sitter_objc::LANGUAGE.into(), Self::Gleam => tree_sitter_gleam::LANGUAGE.into(), Self::Julia => tree_sitter_julia::LANGUAGE.into(), @@ -232,8 +237,8 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, FSharp, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity, - Verilog, + OcamlInterface, FSharp, FSharpSignature, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, + Groovy, R, Solidity, Verilog, ] } } @@ -304,6 +309,7 @@ mod tests { | LanguageKind::Ocaml | LanguageKind::OcamlInterface | LanguageKind::FSharp + | LanguageKind::FSharpSignature | LanguageKind::ObjC | LanguageKind::Gleam | LanguageKind::Julia @@ -320,7 +326,7 @@ mod tests { // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 35; + const EXPECTED_LEN: usize = 36; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/package-lock.json b/package-lock.json index 09281b28..f2055a4f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,7 +39,7 @@ "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", - "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-groovy": "^0.1.2", @@ -7464,18 +7464,18 @@ } }, "node_modules/tree-sitter-fsharp": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/tree-sitter-fsharp/-/tree-sitter-fsharp-0.1.0.tgz", - "integrity": "sha512-TCK+Jkg3qvEe4o6JxqUlE+vUc9lWhHhD7Toglu5Y04/PKa9DgACzqU5Jp9BYZnyhgGLEe+30kVOyFTY/iC/n1Q==", + "version": "0.3.0", + "resolved": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", + "integrity": "sha512-fXN3uk3m9PaOK5MrylouFYvLsu5LbY64oyB0iTlk17fQ290XfX/arrhfGYIhiscaC6fVsoxv2MS0pOJPFzDTxw==", "dev": true, "hasInstallScript": true, "license": "MIT", "dependencies": { - "node-addon-api": "^8.1.0", - "node-gyp-build": "^4.8.2" + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" }, "peerDependencies": { - "tree-sitter": "^0.21.0" + "tree-sitter": "^0.22.4" }, "peerDependenciesMeta": { "tree_sitter": { diff --git a/package.json b/package.json index 1df8cb37..8f95159f 100644 --- a/package.json +++ b/package.json @@ -162,7 +162,7 @@ "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", - "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz", "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-groovy": "^0.1.2", diff --git a/scripts/build-wasm.ts b/scripts/build-wasm.ts index c7d42557..733e4c14 100644 --- a/scripts/build-wasm.ts +++ b/scripts/build-wasm.ts @@ -206,6 +206,7 @@ const grammars = [ { name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' }, { name: 'tree-sitter-ocaml_interface', pkg: 'tree-sitter-ocaml', sub: 'grammars/interface' }, { name: 'tree-sitter-fsharp', pkg: 'tree-sitter-fsharp', sub: 'fsharp' }, + { name: 'tree-sitter-fsharp_signature', pkg: 'tree-sitter-fsharp', sub: 'fsharp_signature' }, { name: 'tree-sitter-gleam', pkg: 'tree-sitter-gleam', sub: null }, { name: 'tree-sitter-clojure', pkg: 'tree-sitter-clojure', sub: null }, { name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null }, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index ed4f8ca3..8c8b8151 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -233,6 +233,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], ['fsharp', FSHARP_AST_TYPES], + ['fsharp-signature', FSHARP_AST_TYPES], ['objc', OBJC_AST_TYPES], ['gleam', GLEAM_AST_TYPES], ['julia', JULIA_AST_TYPES], @@ -313,6 +314,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], ['fsharp', FSHARP_STRING_CONFIG], + ['fsharp-signature', FSHARP_STRING_CONFIG], ['objc', OBJC_STRING_CONFIG], ['gleam', GLEAM_STRING_CONFIG], ['julia', JULIA_STRING_CONFIG], diff --git a/src/domain/parser.ts b/src/domain/parser.ts index a94bbe49..b4aaa366 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -831,11 +831,18 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ }, { id: 'fsharp', - extensions: ['.fs', '.fsx', '.fsi'], + extensions: ['.fs', '.fsx'], grammarFile: 'tree-sitter-fsharp.wasm', extractor: extractFSharpSymbols, required: false, }, + { + id: 'fsharp-signature', + extensions: ['.fsi'], + grammarFile: 'tree-sitter-fsharp_signature.wasm', + extractor: extractFSharpSymbols, + required: false, + }, { id: 'gleam', extensions: ['.gleam'], diff --git a/src/domain/wasm-worker-entry.ts b/src/domain/wasm-worker-entry.ts index e8359a21..ca02bca7 100644 --- a/src/domain/wasm-worker-entry.ts +++ b/src/domain/wasm-worker-entry.ts @@ -306,11 +306,18 @@ const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ }, { id: 'fsharp', - extensions: ['.fs', '.fsx', '.fsi'], + extensions: ['.fs', '.fsx'], grammarFile: 'tree-sitter-fsharp.wasm', extractor: extractFSharpSymbols, required: false, }, + { + id: 'fsharp-signature', + extensions: ['.fsi'], + grammarFile: 'tree-sitter-fsharp_signature.wasm', + extractor: extractFSharpSymbols, + required: false, + }, { id: 'gleam', extensions: ['.gleam'], diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts index ef54dd34..45569fe9 100644 --- a/src/extractors/fsharp.ts +++ b/src/extractors/fsharp.ts @@ -10,6 +10,13 @@ import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from F# files. * + * Grammar source: `tree-sitter-fsharp` v0.3.0 installed via a pinned GitHub + * tarball in `package.json` because the ionide/tree-sitter-fsharp project has + * no v0.3.0 release published to the npm registry. The cargo crate the native + * engine uses is also v0.3.0; both engines must stay aligned. Upgrading + * requires a manual edit of the tarball URL in `package.json` and + * `package-lock.json` — `npm update` will not bump this entry. + * * tree-sitter-fsharp grammar notes: * - named_module: top-level module declaration * - function_declaration_left: LHS of `let name params = ...` @@ -42,6 +49,14 @@ function walkFSharpNode( case 'named_module': nextModule = handleNamedModule(node, ctx); break; + case 'module_defn': + // Nested signature module (`module Foo = ...`) in `.fsi` files, + // emitted by both the WASM (npm ionide tarball v0.3.0) and cargo + // v0.3.0 tree-sitter-fsharp signature grammars. Accumulate the + // dotted module path so nested `val` declarations are qualified + // as `Outer.Inner.foo` in parity with the native engine. + nextModule = handleModuleDefn(node, ctx, currentModule); + break; case 'function_declaration_left': handleFunctionDecl(node, ctx, currentModule); break; @@ -57,6 +72,9 @@ function walkFSharpNode( case 'dot_expression': handleDotExpression(node, ctx); break; + case 'value_definition': + handleValueDefinition(node, ctx, currentModule); + break; } for (let i = 0; i < node.childCount; i++) { @@ -79,6 +97,27 @@ function handleNamedModule(node: TreeSitterNode, ctx: ExtractorOutput): string | return nameNode.text; } +function handleModuleDefn( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): string | null { + // `module_defn` (cargo 0.3.0 signature grammar) wraps `module Foo = ...` + // sections inside an outer `namespace` or another module. The name is a + // direct `identifier` child. + const nameNode = findChild(node, 'identifier'); + if (!nameNode) return currentModule; + + const qualified = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text; + ctx.definitions.push({ + name: qualified, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + return qualified; +} + function handleFunctionDecl( node: TreeSitterNode, ctx: ExtractorOutput, @@ -251,3 +290,68 @@ function handleDotExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { ctx.calls.push(call); } } + +// Handle `val name : type` declarations in `.fsi` signature files. +// The signature grammar reuses `value_definition` for `val` bindings, +// distinguished from the source grammar's `let` bindings by the first +// child being the literal `val` keyword. Source-file `value_definition` +// nodes (which start with `let`) are intentionally ignored to preserve +// `.fs` extractor parity. +function handleValueDefinition( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + const first = node.child(0); + if (!first || first.type !== 'val') return; + + const declLeft = findChild(node, 'value_declaration_left'); + if (!declLeft) return; + + const pattern = findChild(declLeft, 'identifier_pattern'); + if (!pattern) return; + + const ident = + findChild(findChild(pattern, 'long_identifier_or_op') ?? pattern, 'identifier') ?? + findChild(pattern, 'identifier'); + if (!ident) return; + + // The npm and cargo tree-sitter-fsharp 0.3.0 grammars — though sharing a + // version tag — emit type signatures with different node shapes: + // • WASM (npm 0.3.0 ionide tarball): `function_type` is the explicit + // function-type kind, present as a direct child of `value_definition` + // for `a -> b` types; plain values (e.g. `val pi : float`) appear as + // `simple_type`. + // • Native (cargo 0.3.0): every type signature is wrapped in + // `curried_spec`. A function type contains one or more `arguments_spec` + // children; a plain value wraps a single `simple_type`. + // Classify as a function whenever `function_type` appears OR a + // `curried_spec` contains an `arguments_spec` child, so both engines stay + // in parity until the grammars converge. + let hasFunctionType = false; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === 'function_type') { + hasFunctionType = true; + break; + } + if (c.type === 'curried_spec') { + for (let j = 0; j < c.childCount; j++) { + if (c.child(j)?.type === 'arguments_spec') { + hasFunctionType = true; + break; + } + } + if (hasFunctionType) break; + } + } + + const name = currentModule ? `${currentModule}.${ident.text}` : ident.text; + ctx.definitions.push({ + name, + kind: hasFunctionType ? 'function' : 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} diff --git a/src/types.ts b/src/types.ts index 8e4bf13c..542b81bc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -99,6 +99,7 @@ export type LanguageId = | 'ocaml' | 'ocaml-interface' | 'fsharp' + | 'fsharp-signature' | 'gleam' | 'clojure' | 'julia' diff --git a/tests/parsers/fsharp-signature.test.ts b/tests/parsers/fsharp-signature.test.ts new file mode 100644 index 00000000..4fa7cfd6 --- /dev/null +++ b/tests/parsers/fsharp-signature.test.ts @@ -0,0 +1,73 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractFSharpSymbols } from '../../src/domain/parser.js'; + +describe('F# signature (.fsi) parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseFSi(code: string) { + const parser = parsers.get('fsharp-signature'); + if (!parser) throw new Error('F# signature parser not available'); + const tree = parser.parse(code); + return { tree, symbols: extractFSharpSymbols(tree, 'test.fsi') }; + } + + it('parses bare val declarations without ERROR nodes', () => { + // The main F# grammar produces ERROR nodes for `val` declarations + // (#1114); the signature grammar parses them as `value_definition`. + const { tree, symbols } = parseFSi( + `namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n`, + ); + expect(tree.rootNode.hasError).toBe(false); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'pi', kind: 'variable' }), + ); + }); + + it('extracts bare top-level val declarations', () => { + const { tree, symbols } = parseFSi(`val negate : int -> int\nval count : int\n`); + expect(tree.rootNode.hasError).toBe(false); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'negate', kind: 'function' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'count', kind: 'variable' }), + ); + }); + + it('extracts val declarations nested inside a module signature', () => { + // Both the WASM (npm ionide tarball v0.3.0) and the cargo v0.3.0 + // tree-sitter-fsharp signature grammars emit `module_defn` for + // `module Foo = ...`, so `val` declarations nested inside are + // qualified with the module path (`Foo.add`) in both engines. The + // outer `module Foo` is also indexed as a `module` definition. + const { symbols } = parseFSi(`module Foo =\n val add : int -> int\n`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Foo', kind: 'module' }), + ); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Foo.add', kind: 'function' }), + ); + // The unqualified name must NOT appear — that would mean the walker + // failed to thread the enclosing module through to `handleValueDefinition`. + expect(symbols.definitions).not.toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + }); + + it('does not crash when the grammar produces ERROR nodes for unsupported constructs', () => { + // `open` at the namespace top level is not handled by the upstream + // signature grammar v0.3.0 — it produces ERROR nodes but val + // declarations still recover via the parser's error recovery. + const { symbols } = parseFSi(`namespace X\n\nopen System\n\nval read : string -> string\n`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'read', kind: 'function' }), + ); + }); +}); diff --git a/tests/parsers/fsharp.test.ts b/tests/parsers/fsharp.test.ts index 4957617b..59c2f547 100644 --- a/tests/parsers/fsharp.test.ts +++ b/tests/parsers/fsharp.test.ts @@ -49,4 +49,17 @@ open System.IO`); const symbols = parseFSharp(`let result = List.map (fun x -> x + 1) [1; 2; 3]`); expect(symbols.calls.length).toBeGreaterThanOrEqual(1); }); + + it('does not extract `val mutable` class fields as definitions', () => { + // `val mutable count: int = 0` inside a class is parsed as `member_defn` + // in the source grammar — NOT `value_definition` — so the signature + // `val`-style handler must not produce a definition for it. + const symbols = parseFSharp(`module M + +type C() = + val mutable count: int = 0 +`); + expect(symbols.definitions.find((d) => d.name === 'count')).toBeUndefined(); + expect(symbols.definitions.find((d) => d.name === 'M.count')).toBeUndefined(); + }); });