-
Notifications
You must be signed in to change notification settings - Fork 10
fix(julia): port parameterized-type / qualified-def / qualified-import fixes to WASM #1128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
796352b
8c2e148
92ce81b
f90e811
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,17 +83,49 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n | |
| return nameNode.text; | ||
| } | ||
|
|
||
| function qualifyName(base: string, currentModule: string | null): string { | ||
| // For qualified names (`function Base.show ... end` inside `module Foo`, | ||
| // or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS | ||
| // is a `scoped_identifier` already containing the qualifier — skip the | ||
| // module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`. | ||
| if (currentModule && !base.includes('.')) return `${currentModule}.${base}`; | ||
| return base; | ||
| } | ||
|
|
||
| /** | ||
| * Extract the call_expression from a function/macro definition's signature. | ||
| * | ||
| * tree-sitter-julia wraps the signature in a `signature` node whose direct | ||
| * children include the `call_expression` for the function name and parameters. | ||
| * `findChild` only inspects direct children, so we unwrap one level explicitly. | ||
| * Without this step, `findChild(node, 'call_expression')` on a | ||
| * `function_definition` would match the *body's* first call_expression | ||
| * (e.g. `println(...)` inside the body) instead of the signature. | ||
| * | ||
| * Grammar assumption: every `function_definition` / `macro_definition` emits a | ||
| * `signature` child in the current tree-sitter-julia grammar. The fallback to | ||
| * `findChild(node, 'call_expression')` exists only as a defensive measure for | ||
| * grammar drift — if it ever fires on a real definition, that fallback would | ||
| * silently match the first body call_expression and mis-record the function | ||
| * name. Callers must therefore treat a missing `signature` as a parser/grammar | ||
| * mismatch worth investigating, not as a routine code path. | ||
| */ | ||
| function signatureCall(node: TreeSitterNode): TreeSitterNode | null { | ||
| const sig = findChild(node, 'signature'); | ||
| if (sig) return findChild(sig, 'call_expression'); | ||
| return findChild(node, 'call_expression'); | ||
| } | ||
|
|
||
| function handleFunctionDef( | ||
| node: TreeSitterNode, | ||
| ctx: ExtractorOutput, | ||
| currentModule: string | null, | ||
| ): void { | ||
| // function_definition may have a call_expression child as the signature | ||
| const callSig = findChild(node, 'call_expression'); | ||
| const callSig = signatureCall(node); | ||
| if (callSig) { | ||
| const funcNameNode = callSig.child(0); | ||
| if (funcNameNode) { | ||
| const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text; | ||
| const name = qualifyName(funcNameNode.text, currentModule); | ||
| const params = extractJuliaParams(callSig); | ||
| ctx.definitions.push({ | ||
| name, | ||
|
|
@@ -110,9 +142,8 @@ function handleFunctionDef( | |
| const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); | ||
| if (!nameNode) return; | ||
|
|
||
| const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text; | ||
| ctx.definitions.push({ | ||
| name, | ||
| name: qualifyName(nameNode.text, currentModule), | ||
| kind: 'function', | ||
| line: node.startPosition.row + 1, | ||
| endLine: nodeEndLine(node), | ||
|
|
@@ -133,11 +164,10 @@ function handleAssignment( | |
| const funcNameNode = lhs.child(0); | ||
| if (!funcNameNode) return; | ||
|
|
||
| const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text; | ||
| const params = extractJuliaParams(lhs); | ||
|
|
||
| ctx.definitions.push({ | ||
| name, | ||
| name: qualifyName(funcNameNode.text, currentModule), | ||
| kind: 'function', | ||
| line: node.startPosition.row + 1, | ||
| endLine: nodeEndLine(node), | ||
|
|
@@ -146,16 +176,74 @@ function handleAssignment( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Locate the base-name identifier within a `type_head` node. | ||
| * | ||
| * Handles plain identifiers, `Name <: Super` binary expressions, and | ||
| * parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing | ||
| * into wrapper kinds the Julia grammar actually emits for type heads | ||
| * (binary expressions, parametrized type expressions, parameterized | ||
| * identifiers). Returns `null` when no identifier can be located — callers | ||
| * should skip emitting a definition in that case. | ||
| * | ||
| * Note: `type_parameter_list` / `type_argument_list` are intentionally | ||
| * excluded — Julia's grammar uses `curly_expression` for `{T}` constructs, | ||
| * not those node kinds. Including them would risk recursing into a | ||
| * type-parameter list and returning a type variable (e.g. `T`) instead of | ||
| * the struct name if `findBaseName` were ever called on a node lacking a | ||
| * direct `identifier` child. | ||
| */ | ||
| const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([ | ||
| 'binary_expression', | ||
| 'parametrized_type_expression', | ||
| 'parameterized_identifier', | ||
| ]); | ||
|
|
||
| function findBaseName(node: TreeSitterNode): TreeSitterNode | null { | ||
| if (node.type === 'identifier') return node; | ||
| const direct = findChild(node, 'identifier'); | ||
| if (direct) return direct; | ||
| for (let i = 0; i < node.childCount; i++) { | ||
| const child = node.child(i); | ||
| if (!child) continue; | ||
| if (TYPE_HEAD_WRAPPERS.has(child.type)) { | ||
| const found = findBaseName(child); | ||
| if (found) return found; | ||
| } | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { | ||
| // struct_definition: struct type_head fields... end | ||
| // type_head wraps the name and optional supertype. The name may be a | ||
| // bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either | ||
| // of those nested inside a `binary_expression` (`Name <: Super`). | ||
| const typeHead = findChild(node, 'type_head'); | ||
| const nameNode = typeHead | ||
| ? (findChild(typeHead, 'identifier') ?? typeHead) | ||
| : findChild(node, 'identifier'); | ||
| if (!typeHead) return; | ||
|
|
||
| let nameNode: TreeSitterNode | null; | ||
| let supertypeNode: TreeSitterNode | null = null; | ||
|
|
||
| const binary = findChild(typeHead, 'binary_expression'); | ||
| if (binary) { | ||
| // Walk into each side of the binary expression to find the base-name | ||
| // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`. | ||
| const sides: TreeSitterNode[] = []; | ||
| for (let i = 0; i < binary.childCount; i++) { | ||
| const c = binary.child(i); | ||
| if (c && c.type !== 'operator') sides.push(c); | ||
| } | ||
| nameNode = sides[0] ? findBaseName(sides[0]) : null; | ||
| supertypeNode = sides[1] ? findBaseName(sides[1]) : null; | ||
| } else { | ||
| nameNode = findBaseName(typeHead); | ||
| } | ||
|
Comment on lines
+228
to
+241
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The old code explicitly looked for a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 8c2e148 — added a test for non-parameterized struct inheritance ( |
||
|
|
||
| if (!nameNode) return; | ||
| const structName = nameNode.text; | ||
|
|
||
| const children: SubDeclaration[] = []; | ||
| // Fields are typed_expression children of struct_definition | ||
| for (let i = 0; i < node.childCount; i++) { | ||
| const child = node.child(i); | ||
| if (!child) continue; | ||
|
|
@@ -168,33 +256,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { | |
| line: child.startPosition.row + 1, | ||
| }); | ||
| } | ||
| } | ||
| // Plain identifier fields (no type annotation) | ||
| if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) { | ||
| } else if (child.type === 'identifier') { | ||
| // Plain identifier fields (no type annotation) appear as direct | ||
| // identifier children of struct_definition. The type_head is a | ||
| // separate node so there is nothing to filter out here. | ||
| children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 }); | ||
| } | ||
| } | ||
|
|
||
| // Check for supertype in type_head (Point <: AbstractPoint) | ||
| if (typeHead) { | ||
| const subtypeExpr = findChild(typeHead, 'subtype_expression'); | ||
| if (subtypeExpr) { | ||
| // Find the supertype identifier | ||
| for (let i = 0; i < subtypeExpr.childCount; i++) { | ||
| const child = subtypeExpr.child(i); | ||
| if (child?.type === 'identifier' && i > 0) { | ||
| ctx.classes.push({ | ||
| name: nameNode.text, | ||
| extends: child.text, | ||
| line: node.startPosition.row + 1, | ||
| }); | ||
| } | ||
| } | ||
| } | ||
| if (supertypeNode) { | ||
| ctx.classes.push({ | ||
| name: structName, | ||
| extends: supertypeNode.text, | ||
| line: node.startPosition.row + 1, | ||
| }); | ||
| } | ||
|
|
||
| ctx.definitions.push({ | ||
| name: nameNode.text, | ||
| name: structName, | ||
| kind: 'struct', | ||
| line: node.startPosition.row + 1, | ||
| endLine: nodeEndLine(node), | ||
|
|
@@ -203,7 +282,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { | |
| } | ||
|
|
||
| function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void { | ||
| const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); | ||
| // abstract_definition: `abstract type` type_head `end` | ||
| // The identifier is nested inside `type_head` — possibly wrapped in a | ||
| // `Name <: Super` binary_expression or a `Name{T,...}` parameterized form. | ||
| // Mirror handleStructDef and skip rather than emit a garbled name when no | ||
| // base identifier can be located. | ||
| const typeHead = findChild(node, 'type_head'); | ||
| if (!typeHead) return; | ||
| const nameNode = findBaseName(typeHead); | ||
| if (!nameNode) return; | ||
|
|
||
| ctx.definitions.push({ | ||
|
|
@@ -219,10 +305,17 @@ function handleMacroDef( | |
| ctx: ExtractorOutput, | ||
| currentModule: string | null, | ||
| ): void { | ||
| const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); | ||
| // macro_definition: `macro` signature/call_expression body `end`. | ||
| // The name lives in the same shape as a function signature — unwrap via | ||
| // signatureCall so we don't pick up an identifier from the body (e.g. | ||
| // `macro mymac(x) x end` would otherwise resolve to `@x`). | ||
| const callSig = signatureCall(node); | ||
| const nameNode = | ||
| callSig?.child(0) ?? node.childForFieldName('name') ?? findChild(node, 'identifier'); | ||
| if (!nameNode) return; | ||
|
|
||
| const name = currentModule ? `${currentModule}.@${nameNode.text}` : `@${nameNode.text}`; | ||
| const base = nameNode.text; | ||
| const name = currentModule ? `${currentModule}.@${base}` : `@${base}`; | ||
| ctx.definitions.push({ | ||
| name, | ||
| kind: 'function', | ||
|
|
@@ -232,19 +325,40 @@ function handleMacroDef( | |
| } | ||
|
|
||
| function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { | ||
| // tree-sitter-julia shapes: | ||
| // `using LinearAlgebra` → using_statement [ using, identifier ] | ||
| // `import Foo.Bar` → import_statement [ import, scoped_identifier ] | ||
| // `import Base: show` → import_statement [ import, selected_import[Base, show] ] | ||
| // `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ] | ||
| const names: string[] = []; | ||
| let source = ''; | ||
|
|
||
| for (let i = 0; i < node.childCount; i++) { | ||
| const child = node.child(i); | ||
| if (!child) continue; | ||
| if ( | ||
| child.type === 'identifier' || | ||
| child.type === 'scoped_identifier' || | ||
| child.type === 'selected_import' | ||
| ) { | ||
| if (!source) source = child.text; | ||
| names.push(child.text.split('.').pop() || child.text); | ||
| if (child.type === 'identifier' || child.type === 'scoped_identifier') { | ||
| const txt = child.text; | ||
| if (!source) source = txt; | ||
| names.push(txt.split('.').pop() || txt); | ||
| } else if (child.type === 'selected_import') { | ||
| // First identifier-bearing node is the source module; the rest are | ||
| // imported names. The module may itself be a `scoped_identifier` | ||
| // (e.g. `import Foo.Bar: baz`) — handle it alongside bare | ||
| // `identifier` and use the trailing segment as the display name, | ||
| // mirroring the outer loop. | ||
| let first = true; | ||
| for (let j = 0; j < child.childCount; j++) { | ||
| const part = child.child(j); | ||
| if (!part) continue; | ||
| if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue; | ||
| const txt = part.text; | ||
| if (first) { | ||
| if (!source) source = txt; | ||
| first = false; | ||
| } else { | ||
| names.push(txt.split('.').pop() || txt); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -260,8 +374,15 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { | |
| function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { | ||
| // Don't record if parent is assignment LHS (that's a function definition) | ||
| if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return; | ||
| // Don't record if parent is function_definition (that's a signature) | ||
| if (node.parent?.type === 'function_definition') return; | ||
| // Skip when this call is the signature of a function/macro definition. | ||
| // tree-sitter-julia wraps the signature in a `signature` node whose parent | ||
| // is `function_definition` or `macro_definition`. Body calls (e.g. | ||
| // `println(name)` inside `function greet ... end`) appear as descendants of | ||
| // the body, not as direct children of `signature`, so they are unaffected. | ||
| if (node.parent?.type === 'signature') { | ||
| const grand = node.parent.parent; | ||
| if (grand?.type === 'function_definition' || grand?.type === 'macro_definition') return; | ||
| } | ||
|
|
||
| const funcNode = node.child(0); | ||
| if (!funcNode) return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
type_parameter_list/type_argument_listinTYPE_HEAD_WRAPPERScan yield the wrong identifierfindBaseNamechecksfindChild(node, 'identifier')before recursing, so in practice the struct name is found before the loop reaches atype_parameter_listortype_argument_list. However, iffindBaseNameis ever called with a node that lacks a directidentifierchild and does have one of those wrapper types as a child — for example, a future call site or an unusual parameterized form — the function will recurse intotype_parameter_listand return the first type-parameter identifier (e.g.T) instead of the struct name. Removing those two entries fromTYPE_HEAD_WRAPPERSwould eliminate the risk without affecting correctness.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 8c2e148 — removed
type_parameter_listandtype_argument_listfromTYPE_HEAD_WRAPPERSin both the WASM and native engines (preserving dual-engine parity per CLAUDE.md). AST inspection confirmed Julia's grammar usescurly_expressionfor{T}constructs, not those node kinds, so the entries were dead code. Removing them eliminates the risk of recursing into a type-parameter list and returning a type variable as the struct name, as you noted.