Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 233 additions & 4 deletions crates/codegraph-core/src/extractors/fsharp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,51 @@ impl SymbolExtractor for FSharpExtractor {
fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
match node.kind() {
"named_module" => handle_named_module(node, source, symbols),
"module_defn" => handle_module_defn(node, source, symbols),
"function_declaration_left" => handle_function_decl(node, source, symbols),
"type_definition" => handle_type_def(node, source, symbols),
"import_decl" => handle_import_decl(node, source, symbols),
"application_expression" => handle_application(node, source, symbols),
"dot_expression" => handle_dot_expression(node, source, symbols),
"value_definition" => handle_value_definition(node, source, symbols),
_ => {}
}
}

/// Find the enclosing `named_module` and return its identifier text.
/// Find the enclosing module name, walking up through any number of
/// `module_defn` (nested signature modules) wrappers before reaching the
/// top-level `named_module`. Returns the dotted path, e.g. `Outer.Inner`.
///
/// Source files use `named_module` for the top-level `module M = …` and
/// the signature grammar (cargo 0.3.0) wraps nested signature modules in
/// `module_defn` nodes. The WASM signature grammar currently emits ERROR
/// nodes for nested signature modules so we cannot recover qualification
/// there — tracked under #1161.
fn enclosing_module_name(node: &Node, source: &[u8]) -> Option<String> {
let module = find_parent_of_type(node, "named_module")?;
let id = find_child(&module, "long_identifier")?;
Some(node_text(&id, source).to_string())
let mut parts: Vec<String> = Vec::new();
let mut current = node.parent();
while let Some(p) = current {
match p.kind() {
"module_defn" => {
if let Some(id) = find_child(&p, "identifier") {
parts.push(node_text(&id, source).to_string());
}
}
"named_module" => {
if let Some(id) = find_child(&p, "long_identifier") {
parts.push(node_text(&id, source).to_string());
}
break;
}
_ => {}
}
current = p.parent();
}
if parts.is_empty() {
return None;
}
parts.reverse();
Some(parts.join("."))
}

fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
Expand All @@ -52,6 +83,36 @@ fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
});
}

/// Handle nested signature modules (`module Foo = ...`) emitted by the
/// cargo 0.3.0 grammar as `module_defn`. Emits a `module` definition with
/// the dotted parent path (e.g. `Outer.Foo`) and lets the DFS walker
/// continue into child `val` declarations, which pick up the same path via
/// `enclosing_module_name`.
fn handle_module_defn(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
let name_node = match find_child(node, "identifier") {
Some(n) => n,
None => return,
};
let raw = node_text(&name_node, source).to_string();
// `enclosing_module_name` walks `node.parent()` upward, so calling it on
// the `module_defn` itself yields the dotted prefix of its enclosing
// module(s) without including this module's own name.
let qualified = match enclosing_module_name(node, source) {
Some(prefix) if !prefix.is_empty() => format!("{}.{}", prefix, raw),
_ => raw,
};
symbols.definitions.push(Definition {
name: qualified,
kind: "module".to_string(),
line: start_line(node),
end_line: Some(end_line(node)),
decorators: None,
complexity: None,
cfg: None,
children: None,
});
}

fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
// function_declaration_left: first child is the function name identifier,
// followed by argument_patterns.
Expand Down Expand Up @@ -300,3 +361,171 @@ fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols)
});
}
}

/// Handle `val name : type` declarations in `.fsi` signature files.
///
/// The signature grammar reuses the `value_definition` node kind for `val`
/// declarations, distinguished from the source grammar's `let` bindings by
/// the first child being the literal `val` keyword. Source-file
/// `value_definition` nodes (which start with `let`) are intentionally
/// ignored here to preserve `.fs` extractor parity.
fn handle_value_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
let first = match node.child(0) {
Some(c) => c,
None => return,
};
if first.kind() != "val" {
return;
}

let decl_left = match find_child(node, "value_declaration_left") {
Some(n) => n,
None => return,
};
let name = match extract_value_name(&decl_left, source) {
Some(n) => n,
None => return,
};

let kind = if has_function_type(node) { "function" } else { "variable" };
let module_name = enclosing_module_name(node, source);
let qualified = match module_name {
Some(m) => format!("{}.{}", m, name),
None => name,
};

symbols.definitions.push(Definition {
name: qualified,
kind: kind.to_string(),
line: start_line(node),
end_line: Some(end_line(node)),
decorators: None,
complexity: None,
cfg: None,
children: None,
});
}

fn extract_value_name(decl_left: &Node, source: &[u8]) -> Option<String> {
let pattern = find_child(decl_left, "identifier_pattern")?;
let ident = find_child(&pattern, "long_identifier_or_op")
.and_then(|n| find_child(&n, "identifier"))
.or_else(|| find_child(&pattern, "identifier"))?;
Some(node_text(&ident, source).to_string())
}

fn has_function_type(node: &Node) -> bool {
// The grammar wraps every type signature in `curried_spec`. A function type
// (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec`
// children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`.
let Some(curried) = find_child(node, "curried_spec") else { return false };
for i in 0..curried.child_count() {
if let Some(child) = curried.child(i) {
if child.kind() == "arguments_spec" {
return true;
}
}
}
false
}

#[cfg(test)]
mod tests {
use super::*;
use crate::extractors::SymbolExtractor;
use tree_sitter::Parser;

fn parse_source(code: &str) -> FileSymbols {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_fsharp::LANGUAGE_FSHARP.into())
.unwrap();
let tree = parser.parse(code.as_bytes(), None).unwrap();
FSharpExtractor.extract(&tree, code.as_bytes(), "test.fs")
}

fn parse_signature(code: &str) -> FileSymbols {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_fsharp::LANGUAGE_SIGNATURE.into())
.unwrap();
let tree = parser.parse(code.as_bytes(), None).unwrap();
FSharpExtractor.extract(&tree, code.as_bytes(), "test.fsi")
}

#[test]
fn signature_extracts_val_declarations() {
let s = parse_signature("namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n");
let add = s
.definitions
.iter()
.find(|d| d.name == "add")
.expect("val add should be extracted");
assert_eq!(add.kind, "function");
let pi = s
.definitions
.iter()
.find(|d| d.name == "pi")
.expect("val pi should be extracted");
assert_eq!(pi.kind, "variable");
}

#[test]
fn signature_extracts_bare_val_declarations() {
let s = parse_signature("val negate : int -> int\nval count : int\n");
assert!(s
.definitions
.iter()
.any(|d| d.name == "negate" && d.kind == "function"));
assert!(s
.definitions
.iter()
.any(|d| d.name == "count" && d.kind == "variable"));
}

#[test]
fn source_grammar_does_not_extract_let_bindings_as_val() {
// `let x = 5` is a value_definition in the source grammar but its
// first child is `let`, not `val`. Our handler must not extract it
// (preserves prior `.fs` extraction parity — only function_declaration_left
// produces definitions in source files).
let s = parse_source("module M\n\nlet x = 5\n");
assert!(
s.definitions.iter().all(|d| d.name != "x"),
"let bindings in .fs files must not be extracted as val definitions"
);
}

#[test]
fn signature_qualifies_val_inside_nested_module_defn() {
// The cargo 0.3.0 signature grammar wraps `module Foo = ...` as a
// `module_defn` node (the WASM 0.1.0 grammar emits ERROR for this
// construct — tracked under #1161). The `val` declarations inside
// must be qualified with the module path.
let s = parse_signature("namespace X\n\nmodule Foo =\n val add : int -> int\n");
assert!(
s.definitions.iter().any(|d| d.name == "Foo.add" && d.kind == "function"),
"val add nested under `module Foo =` must be indexed as `Foo.add`, got: {:?}",
s.definitions.iter().map(|d| &d.name).collect::<Vec<_>>(),
);
assert!(
s.definitions.iter().any(|d| d.name == "Foo" && d.kind == "module"),
"module Foo must be indexed as a module definition"
);
}

#[test]
fn source_grammar_does_not_extract_val_mutable_class_fields() {
// `val mutable count: int = 0` inside a class is parsed as a `member_defn`
// node in the source grammar — NOT a `value_definition` — so our
// `value_definition`/`val`-first-child handler does not see it.
// This regression guard makes that empirical fact explicit.
let s = parse_source(
"module M\n\ntype C() =\n val mutable count: int = 0\n",
);
assert!(
s.definitions.iter().all(|d| d.name != "count"),
"val mutable class fields must not be extracted by the signature value_definition handler"
);
}
}
2 changes: 1 addition & 1 deletion crates/codegraph-core/src/extractors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ pub fn extract_symbols_with_opts(
LanguageKind::Ocaml | LanguageKind::OcamlInterface => {
ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
}
LanguageKind::FSharp => {
LanguageKind::FSharp | LanguageKind::FSharpSignature => {
fsharp::FSharpExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
}
LanguageKind::ObjC => {
Expand Down
14 changes: 10 additions & 4 deletions crates/codegraph-core/src/parser_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub enum LanguageKind {
Ocaml,
OcamlInterface,
FSharp,
FSharpSignature,
ObjC,
Gleam,
Julia,
Expand Down Expand Up @@ -70,6 +71,7 @@ impl LanguageKind {
Self::Ocaml => "ocaml",
Self::OcamlInterface => "ocaml-interface",
Self::FSharp => "fsharp",
Self::FSharpSignature => "fsharp-signature",
Self::ObjC => "objc",
Self::Gleam => "gleam",
Self::Julia => "julia",
Expand Down Expand Up @@ -120,7 +122,8 @@ impl LanguageKind {
"hs" => Some(Self::Haskell),
"ml" => Some(Self::Ocaml),
"mli" => Some(Self::OcamlInterface),
"fs" | "fsx" | "fsi" => Some(Self::FSharp),
"fs" | "fsx" => Some(Self::FSharp),
"fsi" => Some(Self::FSharpSignature),
"m" => Some(Self::ObjC),
"gleam" => Some(Self::Gleam),
"jl" => Some(Self::Julia),
Expand Down Expand Up @@ -165,6 +168,7 @@ impl LanguageKind {
"ocaml" => Some(Self::Ocaml),
"ocaml-interface" => Some(Self::OcamlInterface),
"fsharp" => Some(Self::FSharp),
"fsharp-signature" => Some(Self::FSharpSignature),
"objc" => Some(Self::ObjC),
"gleam" => Some(Self::Gleam),
"julia" => Some(Self::Julia),
Expand Down Expand Up @@ -207,6 +211,7 @@ impl LanguageKind {
Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(),
Self::FSharp => tree_sitter_fsharp::LANGUAGE_FSHARP.into(),
Self::FSharpSignature => tree_sitter_fsharp::LANGUAGE_SIGNATURE.into(),
Self::ObjC => tree_sitter_objc::LANGUAGE.into(),
Self::Gleam => tree_sitter_gleam::LANGUAGE.into(),
Self::Julia => tree_sitter_julia::LANGUAGE.into(),
Expand All @@ -232,8 +237,8 @@ impl LanguageKind {
&[
JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C,
Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml,
OcamlInterface, FSharp, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity,
Verilog,
OcamlInterface, FSharp, FSharpSignature, ObjC, Gleam, Julia, Cuda, Clojure, Erlang,
Groovy, R, Solidity, Verilog,
]
}
}
Expand Down Expand Up @@ -304,6 +309,7 @@ mod tests {
| LanguageKind::Ocaml
| LanguageKind::OcamlInterface
| LanguageKind::FSharp
| LanguageKind::FSharpSignature
| LanguageKind::ObjC
| LanguageKind::Gleam
| LanguageKind::Julia
Expand All @@ -320,7 +326,7 @@ mod tests {
// Because both checks require the same manual update, they reinforce
// each other: a developer who updates the match is reminded to also
// update `all()` and this count.
const EXPECTED_LEN: usize = 35;
const EXPECTED_LEN: usize = 36;
assert_eq!(
LanguageKind::all().len(),
EXPECTED_LEN,
Expand Down
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
"tree-sitter-dart": "^1.0.0",
"tree-sitter-elixir": "^0.3.5",
"tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*",
"tree-sitter-fsharp": "^0.1.0",
"tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz",
"tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam",
"tree-sitter-go": "^0.25.0",
"tree-sitter-groovy": "^0.1.2",
Expand Down
1 change: 1 addition & 0 deletions scripts/build-wasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ const grammars = [
{ name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' },
{ name: 'tree-sitter-ocaml_interface', pkg: 'tree-sitter-ocaml', sub: 'grammars/interface' },
{ name: 'tree-sitter-fsharp', pkg: 'tree-sitter-fsharp', sub: 'fsharp' },
{ name: 'tree-sitter-fsharp_signature', pkg: 'tree-sitter-fsharp', sub: 'fsharp_signature' },
{ name: 'tree-sitter-gleam', pkg: 'tree-sitter-gleam', sub: null },
{ name: 'tree-sitter-clojure', pkg: 'tree-sitter-clojure', sub: null },
{ name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null },
Expand Down
Loading
Loading