Skip to content

Commit 7cbb37d

Browse files
authored
fix: Fix VB.NET indexing by implementing fallback chunking system (#6552)
1 parent ebfd384 commit 7cbb37d

File tree

4 files changed

+300
-1
lines changed

4 files changed

+300
-1
lines changed
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
import { describe, it, expect, beforeEach, vi } from "vitest"
2+
import { CodeParser } from "../parser"
3+
import * as path from "path"
4+
5+
// Mock TelemetryService
6+
vi.mock("../../../../../packages/telemetry/src/TelemetryService", () => ({
7+
TelemetryService: {
8+
instance: {
9+
captureEvent: vi.fn(),
10+
},
11+
},
12+
}))
13+
14+
import { shouldUseFallbackChunking } from "../../shared/supported-extensions"
15+
16+
describe("CodeParser - VB.NET and Fallback Extensions Support", () => {
17+
let parser: CodeParser
18+
19+
beforeEach(() => {
20+
parser = new CodeParser()
21+
})
22+
23+
it("should use fallback chunking for VB.NET files", async () => {
24+
// First verify that shouldUseFallbackChunking works
25+
expect(shouldUseFallbackChunking(".vb")).toBe(true)
26+
27+
const vbContent = `
28+
Imports System
29+
Imports System.Collections.Generic
30+
Imports System.Linq
31+
32+
Namespace MyApplication
33+
Public Class Calculator
34+
Private _history As New List(Of String)()
35+
36+
Public Function Add(a As Integer, b As Integer) As Integer
37+
Dim result As Integer = a + b
38+
_history.Add($"{a} + {b} = {result}")
39+
Return result
40+
End Function
41+
42+
Public Function Subtract(a As Integer, b As Integer) As Integer
43+
Dim result As Integer = a - b
44+
_history.Add($"{a} - {b} = {result}")
45+
Return result
46+
End Function
47+
48+
Public Function Multiply(a As Integer, b As Integer) As Integer
49+
Dim result As Integer = a * b
50+
_history.Add($"{a} * {b} = {result}")
51+
Return result
52+
End Function
53+
54+
Public Function Divide(a As Integer, b As Integer) As Double
55+
If b = 0 Then
56+
Throw New DivideByZeroException("Cannot divide by zero")
57+
End If
58+
Dim result As Double = CDbl(a) / CDbl(b)
59+
_history.Add($"{a} / {b} = {result}")
60+
Return result
61+
End Function
62+
63+
Public Function GetHistory() As List(Of String)
64+
Return New List(Of String)(_history)
65+
End Function
66+
67+
Public Sub ClearHistory()
68+
_history.Clear()
69+
End Sub
70+
End Class
71+
72+
Public Module Program
73+
Sub Main(args As String())
74+
Dim calc As New Calculator()
75+
76+
Console.WriteLine("Calculator Demo")
77+
Console.WriteLine("===============")
78+
79+
Console.WriteLine($"10 + 5 = {calc.Add(10, 5)}")
80+
Console.WriteLine($"10 - 5 = {calc.Subtract(10, 5)}")
81+
Console.WriteLine($"10 * 5 = {calc.Multiply(10, 5)}")
82+
Console.WriteLine($"10 / 5 = {calc.Divide(10, 5)}")
83+
84+
Console.WriteLine()
85+
Console.WriteLine("History:")
86+
For Each entry In calc.GetHistory()
87+
Console.WriteLine($" {entry}")
88+
Next
89+
End Sub
90+
End Module
91+
End Namespace
92+
`.trim()
93+
94+
const result = await parser.parseFile("test.vb", {
95+
content: vbContent,
96+
fileHash: "test-hash",
97+
})
98+
99+
// Should have results from fallback chunking
100+
expect(result.length).toBeGreaterThan(0)
101+
102+
// Check that all blocks are of type 'fallback_chunk'
103+
result.forEach((block) => {
104+
expect(block.type).toBe("fallback_chunk")
105+
})
106+
107+
// Verify content is properly chunked
108+
const totalContent = result.map((block) => block.content).join("\n")
109+
expect(totalContent).toBe(vbContent)
110+
111+
// Verify file path is correct
112+
expect(result[0].file_path).toBe("test.vb")
113+
})
114+
115+
it("should handle large VB.NET files with proper chunking", async () => {
116+
// Create a large VB.NET file content
117+
const largeVbContent =
118+
`
119+
Imports System
120+
Imports System.Collections.Generic
121+
122+
Namespace LargeApplication
123+
` +
124+
// Generate many classes to create a large file
125+
Array.from(
126+
{ length: 50 },
127+
(_, i) => `
128+
Public Class TestClass${i}
129+
Private _id As Integer = ${i}
130+
Private _name As String = "Class ${i}"
131+
Private _data As New Dictionary(Of String, Object)()
132+
133+
Public Property Id As Integer
134+
Get
135+
Return _id
136+
End Get
137+
Set(value As Integer)
138+
_id = value
139+
End Set
140+
End Property
141+
142+
Public Property Name As String
143+
Get
144+
Return _name
145+
End Get
146+
Set(value As String)
147+
_name = value
148+
End Set
149+
End Property
150+
151+
Public Sub ProcessData()
152+
For i As Integer = 0 To 100
153+
_data.Add($"key_{i}", $"value_{i}")
154+
Next
155+
End Sub
156+
157+
Public Function GetData() As Dictionary(Of String, Object)
158+
Return New Dictionary(Of String, Object)(_data)
159+
End Function
160+
End Class
161+
`,
162+
).join("\n") +
163+
`
164+
End Namespace
165+
`
166+
167+
const result = await parser.parseFile("large-test.vb", {
168+
content: largeVbContent,
169+
fileHash: "large-test-hash",
170+
})
171+
172+
// Should have multiple chunks due to size
173+
expect(result.length).toBeGreaterThan(1)
174+
175+
// All chunks should be fallback chunks
176+
result.forEach((block) => {
177+
expect(block.type).toBe("fallback_chunk")
178+
})
179+
180+
// Verify chunks don't exceed max size
181+
result.forEach((block) => {
182+
expect(block.content.length).toBeLessThanOrEqual(150000) // MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
183+
})
184+
})
185+
186+
it("should handle empty VB.NET files", async () => {
187+
const emptyContent = ""
188+
189+
const result = await parser.parseFile("empty.vb", {
190+
content: emptyContent,
191+
fileHash: "empty-hash",
192+
})
193+
194+
// Should return empty array for empty content
195+
expect(result).toEqual([])
196+
})
197+
198+
it("should handle small VB.NET files below minimum chunk size", async () => {
199+
const smallContent = "Imports System"
200+
201+
const result = await parser.parseFile("small.vb", {
202+
content: smallContent,
203+
fileHash: "small-hash",
204+
})
205+
206+
// Should return empty array for content below MIN_BLOCK_CHARS
207+
expect(result).toEqual([])
208+
})
209+
210+
it("should use fallback chunking for other configured fallback extensions", async () => {
211+
// Test with Scala which is in our fallback list
212+
const content = `object ScalaExample {
213+
def main(args: Array[String]): Unit = {
214+
println("This is a Scala file that should use fallback chunking")
215+
val numbers = List(1, 2, 3, 4, 5)
216+
val doubled = numbers.map(_ * 2)
217+
println(s"Doubled numbers: $doubled")
218+
}
219+
220+
def factorial(n: Int): Int = {
221+
if (n <= 1) 1
222+
else n * factorial(n - 1)
223+
}
224+
}`
225+
226+
const result = await parser.parseFile("test.scala", {
227+
content: content,
228+
fileHash: "test-hash-scala",
229+
})
230+
231+
// Should have results from fallback chunking
232+
expect(result.length).toBeGreaterThan(0)
233+
234+
// Check that all blocks are of type 'fallback_chunk'
235+
result.forEach((block) => {
236+
expect(block.type).toBe("fallback_chunk")
237+
})
238+
})
239+
})
240+
241+
describe("Fallback Extensions Configuration", () => {
242+
it("should correctly identify extensions that need fallback chunking", () => {
243+
// Extensions that should use fallback
244+
expect(shouldUseFallbackChunking(".vb")).toBe(true)
245+
expect(shouldUseFallbackChunking(".scala")).toBe(true)
246+
247+
// Extensions that should not use fallback (have working parsers)
248+
expect(shouldUseFallbackChunking(".js")).toBe(false)
249+
expect(shouldUseFallbackChunking(".ts")).toBe(false)
250+
expect(shouldUseFallbackChunking(".py")).toBe(false)
251+
expect(shouldUseFallbackChunking(".java")).toBe(false)
252+
expect(shouldUseFallbackChunking(".cs")).toBe(false)
253+
expect(shouldUseFallbackChunking(".go")).toBe(false)
254+
expect(shouldUseFallbackChunking(".rs")).toBe(false)
255+
})
256+
257+
it("should be case-insensitive", () => {
258+
expect(shouldUseFallbackChunking(".VB")).toBe(true)
259+
expect(shouldUseFallbackChunking(".Vb")).toBe(true)
260+
expect(shouldUseFallbackChunking(".SCALA")).toBe(true)
261+
expect(shouldUseFallbackChunking(".Scala")).toBe(true)
262+
})
263+
})

src/services/code-index/processors/parser.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { Node } from "web-tree-sitter"
55
import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/languageParser"
66
import { parseMarkdown } from "../../tree-sitter/markdownParser"
77
import { ICodeParser, CodeBlock } from "../interfaces"
8-
import { scannerExtensions } from "../shared/supported-extensions"
8+
import { scannerExtensions, shouldUseFallbackChunking } from "../shared/supported-extensions"
99
import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
1010
import { TelemetryService } from "@roo-code/telemetry"
1111
import { TelemetryEventName } from "@roo-code/types"
@@ -101,6 +101,11 @@ export class CodeParser implements ICodeParser {
101101
return this.parseMarkdownContent(filePath, content, fileHash, seenSegmentHashes)
102102
}
103103

104+
// Check if this extension should use fallback chunking
105+
if (shouldUseFallbackChunking(`.${ext}`)) {
106+
return this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
107+
}
108+
104109
// Check if we already have the parser loaded
105110
if (!this.loadedParsers[ext]) {
106111
const pendingLoad = this.pendingLoads.get(ext)

src/services/code-index/shared/supported-extensions.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,32 @@ import { extensions as allExtensions } from "../../tree-sitter"
22

33
// Include all extensions including markdown for the scanner
44
export const scannerExtensions = allExtensions
5+
6+
/**
7+
* Extensions that should always use fallback chunking instead of tree-sitter parsing.
8+
* These are typically languages that don't have a proper WASM parser available
9+
* or where the parser doesn't work correctly.
10+
*
11+
* NOTE: Only extensions that are already in the supported extensions list can be added here.
12+
* To add support for new file types, they must first be added to the tree-sitter extensions list.
13+
*
14+
* HOW TO ADD A NEW FALLBACK EXTENSION:
15+
* 1. First ensure the extension is in src/services/tree-sitter/index.ts extensions array
16+
* 2. Add the extension to the fallbackExtensions array below
17+
* 3. The file will automatically use length-based chunking for indexing
18+
*
19+
* Note: Do NOT remove parser cases from languageParser.ts as they may be used elsewhere
20+
*/
21+
export const fallbackExtensions = [
22+
".vb", // Visual Basic .NET - no dedicated WASM parser
23+
".scala", // Scala - uses fallback chunking instead of Lua query workaround
24+
]
25+
26+
/**
27+
* Check if a file extension should use fallback chunking
28+
* @param extension File extension (including the dot)
29+
* @returns true if the extension should use fallback chunking
30+
*/
31+
export function shouldUseFallbackChunking(extension: string): boolean {
32+
return fallbackExtensions.includes(extension.toLowerCase())
33+
}

src/services/tree-sitter/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ const extensions = [
8989
// Embedded Template
9090
"ejs",
9191
"erb",
92+
// Visual Basic .NET
93+
"vb",
9294
].map((e) => `.${e}`)
9395

9496
export { extensions }

0 commit comments

Comments
 (0)