optave · carlos-alm · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs
@@ -77,6 +77,14 @@ fn handle_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> O
 /// `function_definition` / `macro_definition` in a `signature` node whose
 /// first child is the `call_expression` — `find_child` only inspects direct
 /// children, so we unwrap one level explicitly.
+///
+/// Grammar assumption: every `function_definition` / `macro_definition` emits
+/// a `signature` child in the current tree-sitter-julia grammar. The fallback
+/// to `find_child(node, "call_expression")` exists only as a defensive measure
+/// for grammar drift — if it ever fires on a real definition, that fallback
+/// would silently match the first body call_expression and mis-record the
+/// function name. Callers must therefore treat a missing `signature` as a
+/// parser/grammar mismatch worth investigating, not as a routine code path.
 fn signature_call<'a>(node: &Node<'a>) -> Option<Node<'a>> {
     if let Some(sig) = find_child(node, "signature") {
         return find_child(&sig, "call_expression");
@@ -297,9 +305,17 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
 ///
 /// Handles plain identifiers, `Name <: Super` binary expressions, and
 /// parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
-/// into common wrapper kinds (binary expressions, parametrized identifiers,
-/// type-parameter lists). Returns `None` when no identifier can be located —
+/// into wrapper kinds the Julia grammar actually emits for type heads
+/// (binary expressions, parametrized type expressions, parameterized
+/// identifiers). Returns `None` when no identifier can be located —
 /// callers should skip emitting a definition in that case.
+///
+/// Note: `type_parameter_list` / `type_argument_list` are intentionally
+/// excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
+/// not those node kinds. Including them would risk recursing into a
+/// type-parameter list and returning a type variable (e.g. `T`) instead of
+/// the struct name if `find_base_name` were ever called on a node lacking a
+/// direct `identifier` child.
 fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
     // The node itself may already be the identifier (e.g. when called on a
     // direct side of a binary_expression like `Point <: AbstractPoint`).
@@ -317,9 +333,7 @@ fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
         match child.kind() {
             "binary_expression"
             | "parametrized_type_expression"
-            | "parameterized_identifier"
-            | "type_parameter_list"
-            | "type_argument_list" => {
+            | "parameterized_identifier" => {
                 if let Some(found) = find_base_name(&child) {
                     return Some(found);
                 }

diff --git a/src/extractors/julia.ts b/src/extractors/julia.ts
@@ -83,17 +83,49 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
   return nameNode.text;
 }
 
+function qualifyName(base: string, currentModule: string | null): string {
+  // For qualified names (`function Base.show ... end` inside `module Foo`,
+  // or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
+  // is a `scoped_identifier` already containing the qualifier — skip the
+  // module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
+  if (currentModule && !base.includes('.')) return `${currentModule}.${base}`;
+  return base;
+}
+
+/**
+ * Extract the call_expression from a function/macro definition's signature.
+ *
+ * tree-sitter-julia wraps the signature in a `signature` node whose direct
+ * children include the `call_expression` for the function name and parameters.
+ * `findChild` only inspects direct children, so we unwrap one level explicitly.
+ * Without this step, `findChild(node, 'call_expression')` on a
+ * `function_definition` would match the *body's* first call_expression
+ * (e.g. `println(...)` inside the body) instead of the signature.
+ *
+ * Grammar assumption: every `function_definition` / `macro_definition` emits a
+ * `signature` child in the current tree-sitter-julia grammar. The fallback to
+ * `findChild(node, 'call_expression')` exists only as a defensive measure for
+ * grammar drift — if it ever fires on a real definition, that fallback would
+ * silently match the first body call_expression and mis-record the function
+ * name. Callers must therefore treat a missing `signature` as a parser/grammar
+ * mismatch worth investigating, not as a routine code path.
+ */
+function signatureCall(node: TreeSitterNode): TreeSitterNode | null {
+  const sig = findChild(node, 'signature');
+  if (sig) return findChild(sig, 'call_expression');
+  return findChild(node, 'call_expression');
+}
+
 function handleFunctionDef(
   node: TreeSitterNode,
   ctx: ExtractorOutput,
   currentModule: string | null,
 ): void {
-  // function_definition may have a call_expression child as the signature
-  const callSig = findChild(node, 'call_expression');
+  const callSig = signatureCall(node);
   if (callSig) {
     const funcNameNode = callSig.child(0);
     if (funcNameNode) {
-      const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
+      const name = qualifyName(funcNameNode.text, currentModule);
       const params = extractJuliaParams(callSig);
       ctx.definitions.push({
         name,
@@ -110,9 +142,8 @@ function handleFunctionDef(
   const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
   if (!nameNode) return;
 
-  const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text;
   ctx.definitions.push({
-    name,
+    name: qualifyName(nameNode.text, currentModule),
     kind: 'function',
     line: node.startPosition.row + 1,
     endLine: nodeEndLine(node),
@@ -133,11 +164,10 @@ function handleAssignment(
     const funcNameNode = lhs.child(0);
     if (!funcNameNode) return;
 
-    const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
     const params = extractJuliaParams(lhs);
 
     ctx.definitions.push({
-      name,
+      name: qualifyName(funcNameNode.text, currentModule),
       kind: 'function',
       line: node.startPosition.row + 1,
       endLine: nodeEndLine(node),
@@ -146,16 +176,74 @@ function handleAssignment(
   }
 }
 
+/**
+ * Locate the base-name identifier within a `type_head` node.
+ *
+ * Handles plain identifiers, `Name <: Super` binary expressions, and
+ * parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
+ * into wrapper kinds the Julia grammar actually emits for type heads
+ * (binary expressions, parametrized type expressions, parameterized
+ * identifiers). Returns `null` when no identifier can be located — callers
+ * should skip emitting a definition in that case.
+ *
+ * Note: `type_parameter_list` / `type_argument_list` are intentionally
+ * excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
+ * not those node kinds. Including them would risk recursing into a
+ * type-parameter list and returning a type variable (e.g. `T`) instead of
+ * the struct name if `findBaseName` were ever called on a node lacking a
+ * direct `identifier` child.
+ */
+const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([
+  'binary_expression',
+  'parametrized_type_expression',
+  'parameterized_identifier',
+]);
+
+function findBaseName(node: TreeSitterNode): TreeSitterNode | null {
+  if (node.type === 'identifier') return node;
+  const direct = findChild(node, 'identifier');
+  if (direct) return direct;
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (!child) continue;
+    if (TYPE_HEAD_WRAPPERS.has(child.type)) {
+      const found = findBaseName(child);
+      if (found) return found;
+    }
+  }
+  return null;
+}
+
 function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   // struct_definition: struct type_head fields... end
+  // type_head wraps the name and optional supertype. The name may be a
+  // bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
+  // of those nested inside a `binary_expression` (`Name <: Super`).
   const typeHead = findChild(node, 'type_head');
-  const nameNode = typeHead
-    ? (findChild(typeHead, 'identifier') ?? typeHead)
-    : findChild(node, 'identifier');
+  if (!typeHead) return;
+
+  let nameNode: TreeSitterNode | null;
+  let supertypeNode: TreeSitterNode | null = null;
+
+  const binary = findChild(typeHead, 'binary_expression');
+  if (binary) {
+    // Walk into each side of the binary expression to find the base-name
+    // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
+    const sides: TreeSitterNode[] = [];
+    for (let i = 0; i < binary.childCount; i++) {
+      const c = binary.child(i);
+      if (c && c.type !== 'operator') sides.push(c);
+    }
+    nameNode = sides[0] ? findBaseName(sides[0]) : null;
+    supertypeNode = sides[1] ? findBaseName(sides[1]) : null;
+  } else {
+    nameNode = findBaseName(typeHead);
+  }
+
   if (!nameNode) return;
+  const structName = nameNode.text;
 
   const children: SubDeclaration[] = [];
-  // Fields are typed_expression children of struct_definition
   for (let i = 0; i < node.childCount; i++) {
     const child = node.child(i);
     if (!child) continue;
@@ -168,33 +256,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
           line: child.startPosition.row + 1,
         });
       }
-    }
-    // Plain identifier fields (no type annotation)
-    if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) {
+    } else if (child.type === 'identifier') {
+      // Plain identifier fields (no type annotation) appear as direct
+      // identifier children of struct_definition. The type_head is a
+      // separate node so there is nothing to filter out here.
       children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
     }
   }
 
-  // Check for supertype in type_head (Point <: AbstractPoint)
-  if (typeHead) {
-    const subtypeExpr = findChild(typeHead, 'subtype_expression');
-    if (subtypeExpr) {
-      // Find the supertype identifier
-      for (let i = 0; i < subtypeExpr.childCount; i++) {
-        const child = subtypeExpr.child(i);
-        if (child?.type === 'identifier' && i > 0) {
-          ctx.classes.push({
-            name: nameNode.text,
-            extends: child.text,
-            line: node.startPosition.row + 1,
-          });
-        }
-      }
-    }
+  if (supertypeNode) {
+    ctx.classes.push({
+      name: structName,
+      extends: supertypeNode.text,
+      line: node.startPosition.row + 1,
+    });
   }
 
   ctx.definitions.push({
-    name: nameNode.text,
+    name: structName,
     kind: 'struct',
     line: node.startPosition.row + 1,
     endLine: nodeEndLine(node),
@@ -203,7 +282,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
 }
 
 function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
-  const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
+  // abstract_definition: `abstract type` type_head `end`
+  // The identifier is nested inside `type_head` — possibly wrapped in a
+  // `Name <: Super` binary_expression or a `Name{T,...}` parameterized form.
+  // Mirror handleStructDef and skip rather than emit a garbled name when no
+  // base identifier can be located.
+  const typeHead = findChild(node, 'type_head');
+  if (!typeHead) return;
+  const nameNode = findBaseName(typeHead);
   if (!nameNode) return;
 
   ctx.definitions.push({
@@ -219,10 +305,17 @@ function handleMacroDef(
   ctx: ExtractorOutput,
   currentModule: string | null,
 ): void {
-  const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
+  // macro_definition: `macro` signature/call_expression body `end`.
+  // The name lives in the same shape as a function signature — unwrap via
+  // signatureCall so we don't pick up an identifier from the body (e.g.
+  // `macro mymac(x) x end` would otherwise resolve to `@x`).
+  const callSig = signatureCall(node);
+  const nameNode =
+    callSig?.child(0) ?? node.childForFieldName('name') ?? findChild(node, 'identifier');
   if (!nameNode) return;
 
-  const name = currentModule ? `${currentModule}.@${nameNode.text}` : `@${nameNode.text}`;
+  const base = nameNode.text;
+  const name = currentModule ? `${currentModule}.@${base}` : `@${base}`;
   ctx.definitions.push({
     name,
     kind: 'function',
@@ -232,19 +325,40 @@ function handleMacroDef(
 }
 
 function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
+  // tree-sitter-julia shapes:
+  //   `using LinearAlgebra`     → using_statement [ using, identifier ]
+  //   `import Foo.Bar`          → import_statement [ import, scoped_identifier ]
+  //   `import Base: show`       → import_statement [ import, selected_import[Base, show] ]
+  //   `import Foo.Bar: baz`     → import_statement [ import, selected_import[scoped_identifier, baz] ]
   const names: string[] = [];
   let source = '';
 
   for (let i = 0; i < node.childCount; i++) {
     const child = node.child(i);
     if (!child) continue;
-    if (
-      child.type === 'identifier' ||
-      child.type === 'scoped_identifier' ||
-      child.type === 'selected_import'
-    ) {
-      if (!source) source = child.text;
-      names.push(child.text.split('.').pop() || child.text);
+    if (child.type === 'identifier' || child.type === 'scoped_identifier') {
+      const txt = child.text;
+      if (!source) source = txt;
+      names.push(txt.split('.').pop() || txt);
+    } else if (child.type === 'selected_import') {
+      // First identifier-bearing node is the source module; the rest are
+      // imported names. The module may itself be a `scoped_identifier`
+      // (e.g. `import Foo.Bar: baz`) — handle it alongside bare
+      // `identifier` and use the trailing segment as the display name,
+      // mirroring the outer loop.
+      let first = true;
+      for (let j = 0; j < child.childCount; j++) {
+        const part = child.child(j);
+        if (!part) continue;
+        if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue;
+        const txt = part.text;
+        if (first) {
+          if (!source) source = txt;
+          first = false;
+        } else {
+          names.push(txt.split('.').pop() || txt);
+        }
+      }
     }
   }
 
@@ -260,8 +374,15 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
   // Don't record if parent is assignment LHS (that's a function definition)
   if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return;
-  // Don't record if parent is function_definition (that's a signature)
-  if (node.parent?.type === 'function_definition') return;
+  // Skip when this call is the signature of a function/macro definition.
+  // tree-sitter-julia wraps the signature in a `signature` node whose parent
+  // is `function_definition` or `macro_definition`. Body calls (e.g.
+  // `println(name)` inside `function greet ... end`) appear as descendants of
+  // the body, not as direct children of `signature`, so they are unaffected.
+  if (node.parent?.type === 'signature') {
+    const grand = node.parent.parent;
+    if (grand?.type === 'function_definition' || grand?.type === 'macro_definition') return;
+  }
 
   const funcNode = node.child(0);
   if (!funcNode) return;