Skip to content

Commit b98ab9c

Browse files
committed
fix: Fix VB.NET indexing by implementing fallback chunking system
- Create fallback extensions system for languages without WASM parsers - Add VB.NET (.vb) to fallback extensions list - Remove broken VB.NET parser configuration that was using C# parser - Add comprehensive tests for VB.NET indexing - Make system extensible for future languages without parsers Fixes #6420
1 parent 305a5da commit b98ab9c

File tree

3 files changed

+293
-0
lines changed

3 files changed

+293
-0
lines changed
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
import { describe, it, expect, beforeEach, vi } from "vitest"
2+
import { CodeParser } from "../parser"
3+
import * as path from "path"
4+
5+
// Mock TelemetryService
6+
vi.mock("../../../../../packages/telemetry/src/TelemetryService", () => ({
7+
TelemetryService: {
8+
instance: {
9+
captureEvent: vi.fn(),
10+
},
11+
},
12+
}))
13+
14+
import { shouldUseFallbackChunking } from "../../shared/fallback-extensions"
15+
16+
describe("CodeParser - VB.NET and Fallback Extensions Support", () => {
17+
let parser: CodeParser
18+
19+
beforeEach(() => {
20+
parser = new CodeParser()
21+
})
22+
23+
it("should use fallback chunking for VB.NET files", async () => {
24+
const vbContent = `
25+
Imports System
26+
Imports System.Collections.Generic
27+
Imports System.Linq
28+
29+
Namespace MyApplication
30+
Public Class Calculator
31+
Private _history As New List(Of String)()
32+
33+
Public Function Add(a As Integer, b As Integer) As Integer
34+
Dim result As Integer = a + b
35+
_history.Add($"{a} + {b} = {result}")
36+
Return result
37+
End Function
38+
39+
Public Function Subtract(a As Integer, b As Integer) As Integer
40+
Dim result As Integer = a - b
41+
_history.Add($"{a} - {b} = {result}")
42+
Return result
43+
End Function
44+
45+
Public Function Multiply(a As Integer, b As Integer) As Integer
46+
Dim result As Integer = a * b
47+
_history.Add($"{a} * {b} = {result}")
48+
Return result
49+
End Function
50+
51+
Public Function Divide(a As Integer, b As Integer) As Double
52+
If b = 0 Then
53+
Throw New DivideByZeroException("Cannot divide by zero")
54+
End If
55+
Dim result As Double = CDbl(a) / CDbl(b)
56+
_history.Add($"{a} / {b} = {result}")
57+
Return result
58+
End Function
59+
60+
Public Function GetHistory() As List(Of String)
61+
Return New List(Of String)(_history)
62+
End Function
63+
64+
Public Sub ClearHistory()
65+
_history.Clear()
66+
End Sub
67+
End Class
68+
69+
Public Module Program
70+
Sub Main(args As String())
71+
Dim calc As New Calculator()
72+
73+
Console.WriteLine("Calculator Demo")
74+
Console.WriteLine("===============")
75+
76+
Console.WriteLine($"10 + 5 = {calc.Add(10, 5)}")
77+
Console.WriteLine($"10 - 5 = {calc.Subtract(10, 5)}")
78+
Console.WriteLine($"10 * 5 = {calc.Multiply(10, 5)}")
79+
Console.WriteLine($"10 / 5 = {calc.Divide(10, 5)}")
80+
81+
Console.WriteLine()
82+
Console.WriteLine("History:")
83+
For Each entry In calc.GetHistory()
84+
Console.WriteLine($" {entry}")
85+
Next
86+
End Sub
87+
End Module
88+
End Namespace
89+
`.trim()
90+
91+
const result = await parser.parseFile("test.vb", {
92+
content: vbContent,
93+
fileHash: "test-hash",
94+
})
95+
96+
// Should have results from fallback chunking
97+
expect(result.length).toBeGreaterThan(0)
98+
99+
// Check that all blocks are of type 'fallback_chunk'
100+
result.forEach((block) => {
101+
expect(block.type).toBe("fallback_chunk")
102+
})
103+
104+
// Verify content is properly chunked
105+
const totalContent = result.map((block) => block.content).join("\n")
106+
expect(totalContent).toBe(vbContent)
107+
108+
// Verify file path is correct
109+
expect(result[0].file_path).toBe("test.vb")
110+
})
111+
112+
it("should handle large VB.NET files with proper chunking", async () => {
113+
// Create a large VB.NET file content
114+
const largeVbContent =
115+
`
116+
Imports System
117+
Imports System.Collections.Generic
118+
119+
Namespace LargeApplication
120+
` +
121+
// Generate many classes to create a large file
122+
Array.from(
123+
{ length: 50 },
124+
(_, i) => `
125+
Public Class TestClass${i}
126+
Private _id As Integer = ${i}
127+
Private _name As String = "Class ${i}"
128+
Private _data As New Dictionary(Of String, Object)()
129+
130+
Public Property Id As Integer
131+
Get
132+
Return _id
133+
End Get
134+
Set(value As Integer)
135+
_id = value
136+
End Set
137+
End Property
138+
139+
Public Property Name As String
140+
Get
141+
Return _name
142+
End Get
143+
Set(value As String)
144+
_name = value
145+
End Set
146+
End Property
147+
148+
Public Sub ProcessData()
149+
For i As Integer = 0 To 100
150+
_data.Add($"key_{i}", $"value_{i}")
151+
Next
152+
End Sub
153+
154+
Public Function GetData() As Dictionary(Of String, Object)
155+
Return New Dictionary(Of String, Object)(_data)
156+
End Function
157+
End Class
158+
`,
159+
).join("\n") +
160+
`
161+
End Namespace
162+
`
163+
164+
const result = await parser.parseFile("large-test.vb", {
165+
content: largeVbContent,
166+
fileHash: "large-test-hash",
167+
})
168+
169+
// Should have multiple chunks due to size
170+
expect(result.length).toBeGreaterThan(1)
171+
172+
// All chunks should be fallback chunks
173+
result.forEach((block) => {
174+
expect(block.type).toBe("fallback_chunk")
175+
})
176+
177+
// Verify chunks don't exceed max size
178+
result.forEach((block) => {
179+
expect(block.content.length).toBeLessThanOrEqual(150000) // MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
180+
})
181+
})
182+
183+
it("should handle empty VB.NET files", async () => {
184+
const emptyContent = ""
185+
186+
const result = await parser.parseFile("empty.vb", {
187+
content: emptyContent,
188+
fileHash: "empty-hash",
189+
})
190+
191+
// Should return empty array for empty content
192+
expect(result).toEqual([])
193+
})
194+
195+
it("should handle small VB.NET files below minimum chunk size", async () => {
196+
const smallContent = "Imports System"
197+
198+
const result = await parser.parseFile("small.vb", {
199+
content: smallContent,
200+
fileHash: "small-hash",
201+
})
202+
203+
// Should return empty array for content below MIN_BLOCK_CHARS
204+
expect(result).toEqual([])
205+
})
206+
207+
it("should use fallback chunking for other configured fallback extensions", async () => {
208+
// Test with Scala which is in our fallback list
209+
const content = `object ScalaExample {
210+
def main(args: Array[String]): Unit = {
211+
println("This is a Scala file that should use fallback chunking")
212+
val numbers = List(1, 2, 3, 4, 5)
213+
val doubled = numbers.map(_ * 2)
214+
println(s"Doubled numbers: $doubled")
215+
}
216+
217+
def factorial(n: Int): Int = {
218+
if (n <= 1) 1
219+
else n * factorial(n - 1)
220+
}
221+
}`
222+
223+
const result = await parser.parseFile("test.scala", {
224+
content: content,
225+
fileHash: "test-hash-scala",
226+
})
227+
228+
// Should have results from fallback chunking
229+
expect(result.length).toBeGreaterThan(0)
230+
231+
// Check that all blocks are of type 'fallback_chunk'
232+
result.forEach((block) => {
233+
expect(block.type).toBe("fallback_chunk")
234+
})
235+
})
236+
})
237+
238+
describe("Fallback Extensions Configuration", () => {
239+
it("should correctly identify extensions that need fallback chunking", () => {
240+
// Extensions that should use fallback
241+
expect(shouldUseFallbackChunking(".vb")).toBe(true)
242+
expect(shouldUseFallbackChunking(".scala")).toBe(true)
243+
244+
// Extensions that should not use fallback (have working parsers)
245+
expect(shouldUseFallbackChunking(".js")).toBe(false)
246+
expect(shouldUseFallbackChunking(".ts")).toBe(false)
247+
expect(shouldUseFallbackChunking(".py")).toBe(false)
248+
expect(shouldUseFallbackChunking(".java")).toBe(false)
249+
expect(shouldUseFallbackChunking(".cs")).toBe(false)
250+
expect(shouldUseFallbackChunking(".go")).toBe(false)
251+
expect(shouldUseFallbackChunking(".rs")).toBe(false)
252+
})
253+
254+
it("should be case-insensitive", () => {
255+
expect(shouldUseFallbackChunking(".VB")).toBe(true)
256+
expect(shouldUseFallbackChunking(".Vb")).toBe(true)
257+
expect(shouldUseFallbackChunking(".SCALA")).toBe(true)
258+
expect(shouldUseFallbackChunking(".Scala")).toBe(true)
259+
})
260+
})

src/services/code-index/processors/parser.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/l
66
import { parseMarkdown } from "../../tree-sitter/markdownParser"
77
import { ICodeParser, CodeBlock } from "../interfaces"
88
import { scannerExtensions } from "../shared/supported-extensions"
9+
import { shouldUseFallbackChunking } from "../shared/fallback-extensions"
910
import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
1011
import { TelemetryService } from "@roo-code/telemetry"
1112
import { TelemetryEventName } from "@roo-code/types"
@@ -101,6 +102,11 @@ export class CodeParser implements ICodeParser {
101102
return this.parseMarkdownContent(filePath, content, fileHash, seenSegmentHashes)
102103
}
103104

105+
// Check if this extension should use fallback chunking
106+
if (shouldUseFallbackChunking(`.${ext}`)) {
107+
return this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
108+
}
109+
104110
// Check if we already have the parser loaded
105111
if (!this.loadedParsers[ext]) {
106112
const pendingLoad = this.pendingLoads.get(ext)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* Extensions that should always use fallback chunking instead of tree-sitter parsing.
3+
* These are typically languages that don't have a proper WASM parser available
4+
* or where the parser doesn't work correctly.
5+
*
6+
* NOTE: Only extensions that are already in the supported extensions list can be added here.
7+
* To add support for new file types, they must first be added to the tree-sitter extensions list.
8+
*
9+
* HOW TO ADD A NEW FALLBACK EXTENSION:
10+
* 1. First ensure the extension is in src/services/tree-sitter/index.ts extensions array
11+
* 2. Add the extension to the fallbackExtensions array below
12+
* 3. Remove any parser case for this extension from src/services/tree-sitter/languageParser.ts
13+
* 4. The file will automatically use length-based chunking for indexing
14+
*/
15+
export const fallbackExtensions = [
16+
".vb", // Visual Basic .NET - no dedicated WASM parser
17+
".scala", // Scala - removed from parser, uses fallback chunking
18+
]
19+
20+
/**
21+
* Check if a file extension should use fallback chunking
22+
* @param extension File extension (including the dot)
23+
* @returns true if the extension should use fallback chunking
24+
*/
25+
export function shouldUseFallbackChunking(extension: string): boolean {
26+
return fallbackExtensions.includes(extension.toLowerCase())
27+
}

0 commit comments

Comments
 (0)