Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
import { describe, it, expect, beforeEach, vi } from "vitest"
import { CodeParser } from "../parser"
import * as path from "path"

// Mock the language parser loading
vi.mock("../../../tree-sitter/languageParser", () => ({
loadRequiredLanguageParsers: vi.fn().mockResolvedValue({
cs: {
parser: {
parse: vi.fn().mockReturnValue({
rootNode: {
type: "compilation_unit",
startPosition: { row: 0, column: 0 },
endPosition: { row: 27, column: 1 },
text: "",
children: [],
},
}),
},
query: {
captures: vi.fn(),
},
},
}),
}))

describe("CodeParser - C# Using Directives Fix", () => {
let parser: CodeParser

beforeEach(() => {
parser = new CodeParser()
vi.clearAllMocks()
})

it("should group using directives together to meet minimum block size", async () => {
const filePath = "/test/TestFile.cs"
const content = `using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;

namespace TestNamespace
{
public class TestClass
{
public void TestMethod()
{
Console.WriteLine("Hello World");
}
}
}`

// Mock the tree-sitter captures to return using directives and other nodes
const mockCaptures = [
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System;",
startPosition: { row: 0, column: 0 },
endPosition: { row: 0, column: 13 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Collections.Generic;",
startPosition: { row: 1, column: 0 },
endPosition: { row: 1, column: 33 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Linq;",
startPosition: { row: 2, column: 0 },
endPosition: { row: 2, column: 18 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Threading.Tasks;",
startPosition: { row: 3, column: 0 },
endPosition: { row: 3, column: 29 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.namespace",
node: {
type: "namespace_declaration",
text: `namespace TestNamespace
{
public class TestClass
{
public void TestMethod()
{
Console.WriteLine("Hello World");
}
}
}`,
startPosition: { row: 5, column: 0 },
endPosition: { row: 14, column: 1 },
children: [],
childForFieldName: () => null,
},
},
]

// Update the mock to return our captures
const { loadRequiredLanguageParsers } = await import("../../../tree-sitter/languageParser")
const mockParsers = await loadRequiredLanguageParsers([filePath])
mockParsers.cs.query.captures = vi.fn().mockReturnValue(mockCaptures)

const result = await parser.parseFile(filePath, { content })

// Should have 2 blocks: grouped using directives and the namespace
expect(result).toHaveLength(2)

// First block should be the grouped using directives
const usingBlock = result.find((block) => block.type === "using_directive_group")
expect(usingBlock).toBeDefined()
expect(usingBlock?.start_line).toBe(1)
expect(usingBlock?.end_line).toBe(4)
expect(usingBlock?.content).toBe(
"using System;\n" +
"using System.Collections.Generic;\n" +
"using System.Linq;\n" +
"using System.Threading.Tasks;",
)

// Second block should be the namespace
const namespaceBlock = result.find((block) => block.type === "namespace_declaration")
expect(namespaceBlock).toBeDefined()
expect(namespaceBlock?.start_line).toBe(6)
expect(namespaceBlock?.end_line).toBe(15)
})

it("should not group using directives if they are separated by too many lines", async () => {
const filePath = "/test/TestFile.cs"
const content = `using System;
using System.Collections.Generic;
using System.Text;

// Some comment

using System.Linq;
using System.Threading.Tasks;

namespace TestNamespace
{
public class TestClass { }
}`

const mockCaptures = [
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System;",
startPosition: { row: 0, column: 0 },
endPosition: { row: 0, column: 13 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Collections.Generic;",
startPosition: { row: 1, column: 0 },
endPosition: { row: 1, column: 33 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Text;",
startPosition: { row: 2, column: 0 },
endPosition: { row: 2, column: 18 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Linq;",
startPosition: { row: 6, column: 0 },
endPosition: { row: 6, column: 18 },
children: [],
childForFieldName: () => null,
},
},
{
name: "name.definition.using",
node: {
type: "using_directive",
text: "using System.Threading.Tasks;",
startPosition: { row: 7, column: 0 },
endPosition: { row: 7, column: 29 },
children: [],
childForFieldName: () => null,
},
},
]

const { loadRequiredLanguageParsers } = await import("../../../tree-sitter/languageParser")
const mockParsers = await loadRequiredLanguageParsers([filePath])
mockParsers.cs.query.captures = vi.fn().mockReturnValue(mockCaptures)

const result = await parser.parseFile(filePath, { content })

// Should have at least one block for the grouped using directives
const usingBlocks = result.filter((block) => block.type === "using_directive_group")
expect(usingBlocks.length).toBeGreaterThanOrEqual(1)

// The first group should contain the first three using directives
const firstGroup = usingBlocks[0]
expect(firstGroup.content).toBe(
"using System;\n" + "using System.Collections.Generic;\n" + "using System.Text;",
)
expect(firstGroup.start_line).toBe(1)
expect(firstGroup.end_line).toBe(3)

// If there's a second group, it should contain the last two using directives
if (usingBlocks.length > 1) {
const secondGroup = usingBlocks[1]
expect(secondGroup.content).toBe("using System.Linq;\n" + "using System.Threading.Tasks;")
expect(secondGroup.start_line).toBe(7)
expect(secondGroup.end_line).toBe(8)
}
})
})
115 changes: 113 additions & 2 deletions src/services/code-index/processors/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,42 @@ export class CodeParser implements ICodeParser {
const results: CodeBlock[] = []

// Process captures if not empty
const queue: Node[] = Array.from(captures).map((capture) => capture.node)
const captureNodes = Array.from(captures).map((capture) => capture.node)

// Group small related nodes together (e.g., using directives in C#)
const groupedNodes = this._groupSmallNodes(captureNodes)

const queue: Node[] = [...groupedNodes.regularNodes]

// Process grouped small nodes first
for (const group of groupedNodes.groups) {
const { nodes, type, identifier } = group
const combinedContent = nodes.map((n) => n.text).join("\n")
const start_line = nodes[0].startPosition.row + 1
const end_line = nodes[nodes.length - 1].endPosition.row + 1

// Only create a block if the combined content meets the minimum size
if (combinedContent.length >= MIN_BLOCK_CHARS) {
const contentPreview = combinedContent.slice(0, 100)
const segmentHash = createHash("sha256")
.update(`${filePath}-${start_line}-${end_line}-${combinedContent.length}-${contentPreview}`)
.digest("hex")

if (!seenSegmentHashes.has(segmentHash)) {
seenSegmentHashes.add(segmentHash)
results.push({
file_path: filePath,
identifier,
type,
start_line,
end_line,
content: combinedContent,
segmentHash,
fileHash,
})
}
}
}

while (queue.length > 0) {
const currentNode = queue.shift()!
Expand Down Expand Up @@ -218,12 +253,88 @@ export class CodeParser implements ICodeParser {
}
}
}
// Nodes smaller than minBlockChars are ignored
// Nodes smaller than minBlockChars are ignored unless they were grouped
}

return results
}

/**
* Groups small nodes that are semantically related (e.g., consecutive using directives)
* to ensure they meet the minimum block size requirement when combined.
*/
private _groupSmallNodes(nodes: Node[]): {
groups: Array<{ nodes: Node[]; type: string; identifier: string | null }>
regularNodes: Node[]
} {
const groups: Array<{ nodes: Node[]; type: string; identifier: string | null }> = []
const regularNodes: Node[] = []
const processedIndices = new Set<number>()

// Group consecutive nodes of the same type that are small
for (let i = 0; i < nodes.length; i++) {
if (processedIndices.has(i)) continue

const node = nodes[i]

// If node is large enough on its own, add to regular nodes
if (node.text.length >= MIN_BLOCK_CHARS) {
regularNodes.push(node)
processedIndices.add(i)
continue
}

// Try to group small nodes of the same type
const nodeType = node.type
const groupNodes: Node[] = [node]
processedIndices.add(i)

// Look for consecutive nodes of the same type
for (let j = i + 1; j < nodes.length; j++) {
if (processedIndices.has(j)) continue

const nextNode = nodes[j]

// Stop grouping if we encounter a different type or a large node
if (nextNode.type !== nodeType || nextNode.text.length >= MIN_BLOCK_CHARS) {
break
}

// Check if nodes are consecutive (no significant gap between them)
const prevEndLine = groupNodes[groupNodes.length - 1].endPosition.row
const nextStartLine = nextNode.startPosition.row

// Allow up to 1 empty line between grouped nodes
if (nextStartLine - prevEndLine <= 2) {
groupNodes.push(nextNode)
processedIndices.add(j)
} else {
break
}
}

// Only create a group if we have multiple nodes or if it's a special type
// that should be grouped even when alone (like using directives)
if (groupNodes.length > 1 || nodeType === "using_directive") {
groups.push({
nodes: groupNodes,
type: nodeType + "_group",
identifier: null,
})
}
// Otherwise, the single small node will be ignored
}

// Add any remaining unprocessed nodes to regular nodes
for (let i = 0; i < nodes.length; i++) {
if (!processedIndices.has(i) && nodes[i].text.length >= MIN_BLOCK_CHARS) {
regularNodes.push(nodes[i])
}
}

return { groups, regularNodes }
}

/**
* Common helper function to chunk text by lines, avoiding tiny remainders.
*/
Expand Down