Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 263 additions & 0 deletions src/services/code-index/processors/__tests__/parser.vb.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
import { describe, it, expect, beforeEach, vi } from "vitest"
import { CodeParser } from "../parser"
import * as path from "path"

// Mock TelemetryService
vi.mock("../../../../../packages/telemetry/src/TelemetryService", () => ({
TelemetryService: {
instance: {
captureEvent: vi.fn(),
},
},
}))

import { shouldUseFallbackChunking } from "../../shared/supported-extensions"

describe("CodeParser - VB.NET and Fallback Extensions Support", () => {
let parser: CodeParser

beforeEach(() => {
parser = new CodeParser()
})

it("should use fallback chunking for VB.NET files", async () => {
// First verify that shouldUseFallbackChunking works
expect(shouldUseFallbackChunking(".vb")).toBe(true)

const vbContent = `
Imports System
Imports System.Collections.Generic
Imports System.Linq

Namespace MyApplication
Public Class Calculator
Private _history As New List(Of String)()

Public Function Add(a As Integer, b As Integer) As Integer
Dim result As Integer = a + b
_history.Add($"{a} + {b} = {result}")
Return result
End Function

Public Function Subtract(a As Integer, b As Integer) As Integer
Dim result As Integer = a - b
_history.Add($"{a} - {b} = {result}")
Return result
End Function

Public Function Multiply(a As Integer, b As Integer) As Integer
Dim result As Integer = a * b
_history.Add($"{a} * {b} = {result}")
Return result
End Function

Public Function Divide(a As Integer, b As Integer) As Double
If b = 0 Then
Throw New DivideByZeroException("Cannot divide by zero")
End If
Dim result As Double = CDbl(a) / CDbl(b)
_history.Add($"{a} / {b} = {result}")
Return result
End Function

Public Function GetHistory() As List(Of String)
Return New List(Of String)(_history)
End Function

Public Sub ClearHistory()
_history.Clear()
End Sub
End Class

Public Module Program
Sub Main(args As String())
Dim calc As New Calculator()

Console.WriteLine("Calculator Demo")
Console.WriteLine("===============")

Console.WriteLine($"10 + 5 = {calc.Add(10, 5)}")
Console.WriteLine($"10 - 5 = {calc.Subtract(10, 5)}")
Console.WriteLine($"10 * 5 = {calc.Multiply(10, 5)}")
Console.WriteLine($"10 / 5 = {calc.Divide(10, 5)}")

Console.WriteLine()
Console.WriteLine("History:")
For Each entry In calc.GetHistory()
Console.WriteLine($" {entry}")
Next
End Sub
End Module
End Namespace
`.trim()

const result = await parser.parseFile("test.vb", {
content: vbContent,
fileHash: "test-hash",
})

// Should have results from fallback chunking
expect(result.length).toBeGreaterThan(0)

// Check that all blocks are of type 'fallback_chunk'
result.forEach((block) => {
expect(block.type).toBe("fallback_chunk")
})

// Verify content is properly chunked
const totalContent = result.map((block) => block.content).join("\n")
expect(totalContent).toBe(vbContent)

// Verify file path is correct
expect(result[0].file_path).toBe("test.vb")
})

it("should handle large VB.NET files with proper chunking", async () => {
// Create a large VB.NET file content
const largeVbContent =
`
Imports System
Imports System.Collections.Generic

Namespace LargeApplication
` +
// Generate many classes to create a large file
Array.from(
{ length: 50 },
(_, i) => `
Public Class TestClass${i}
Private _id As Integer = ${i}
Private _name As String = "Class ${i}"
Private _data As New Dictionary(Of String, Object)()

Public Property Id As Integer
Get
Return _id
End Get
Set(value As Integer)
_id = value
End Set
End Property

Public Property Name As String
Get
Return _name
End Get
Set(value As String)
_name = value
End Set
End Property

Public Sub ProcessData()
For i As Integer = 0 To 100
_data.Add($"key_{i}", $"value_{i}")
Next
End Sub

Public Function GetData() As Dictionary(Of String, Object)
Return New Dictionary(Of String, Object)(_data)
End Function
End Class
`,
).join("\n") +
`
End Namespace
`

const result = await parser.parseFile("large-test.vb", {
content: largeVbContent,
fileHash: "large-test-hash",
})

// Should have multiple chunks due to size
expect(result.length).toBeGreaterThan(1)

// All chunks should be fallback chunks
result.forEach((block) => {
expect(block.type).toBe("fallback_chunk")
})

// Verify chunks don't exceed max size
result.forEach((block) => {
expect(block.content.length).toBeLessThanOrEqual(150000) // MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
})
})

it("should handle empty VB.NET files", async () => {
const emptyContent = ""

const result = await parser.parseFile("empty.vb", {
content: emptyContent,
fileHash: "empty-hash",
})

// Should return empty array for empty content
expect(result).toEqual([])
})

it("should handle small VB.NET files below minimum chunk size", async () => {
const smallContent = "Imports System"

const result = await parser.parseFile("small.vb", {
content: smallContent,
fileHash: "small-hash",
})

// Should return empty array for content below MIN_BLOCK_CHARS
expect(result).toEqual([])
})

it("should use fallback chunking for other configured fallback extensions", async () => {
// Test with Scala which is in our fallback list
const content = `object ScalaExample {
def main(args: Array[String]): Unit = {
println("This is a Scala file that should use fallback chunking")
val numbers = List(1, 2, 3, 4, 5)
val doubled = numbers.map(_ * 2)
println(s"Doubled numbers: $doubled")
}

def factorial(n: Int): Int = {
if (n <= 1) 1
else n * factorial(n - 1)
}
}`

const result = await parser.parseFile("test.scala", {
content: content,
fileHash: "test-hash-scala",
})

// Should have results from fallback chunking
expect(result.length).toBeGreaterThan(0)

// Check that all blocks are of type 'fallback_chunk'
result.forEach((block) => {
expect(block.type).toBe("fallback_chunk")
})
})
})

describe("Fallback Extensions Configuration", () => {
it("should correctly identify extensions that need fallback chunking", () => {
// Extensions that should use fallback
expect(shouldUseFallbackChunking(".vb")).toBe(true)
expect(shouldUseFallbackChunking(".scala")).toBe(true)

// Extensions that should not use fallback (have working parsers)
expect(shouldUseFallbackChunking(".js")).toBe(false)
expect(shouldUseFallbackChunking(".ts")).toBe(false)
expect(shouldUseFallbackChunking(".py")).toBe(false)
expect(shouldUseFallbackChunking(".java")).toBe(false)
expect(shouldUseFallbackChunking(".cs")).toBe(false)
expect(shouldUseFallbackChunking(".go")).toBe(false)
expect(shouldUseFallbackChunking(".rs")).toBe(false)
})

it("should be case-insensitive", () => {
expect(shouldUseFallbackChunking(".VB")).toBe(true)
expect(shouldUseFallbackChunking(".Vb")).toBe(true)
expect(shouldUseFallbackChunking(".SCALA")).toBe(true)
expect(shouldUseFallbackChunking(".Scala")).toBe(true)
})
})
7 changes: 6 additions & 1 deletion src/services/code-index/processors/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { Node } from "web-tree-sitter"
import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/languageParser"
import { parseMarkdown } from "../../tree-sitter/markdownParser"
import { ICodeParser, CodeBlock } from "../interfaces"
import { scannerExtensions } from "../shared/supported-extensions"
import { scannerExtensions, shouldUseFallbackChunking } from "../shared/supported-extensions"
import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
import { TelemetryService } from "@roo-code/telemetry"
import { TelemetryEventName } from "@roo-code/types"
Expand Down Expand Up @@ -101,6 +101,11 @@ export class CodeParser implements ICodeParser {
return this.parseMarkdownContent(filePath, content, fileHash, seenSegmentHashes)
}

// Check if this extension should use fallback chunking
if (shouldUseFallbackChunking(`.${ext}`)) {
return this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
}

// Check if we already have the parser loaded
if (!this.loadedParsers[ext]) {
const pendingLoad = this.pendingLoads.get(ext)
Expand Down
29 changes: 29 additions & 0 deletions src/services/code-index/shared/supported-extensions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,32 @@ import { extensions as allExtensions } from "../../tree-sitter"

// Include all extensions including markdown for the scanner
export const scannerExtensions = allExtensions

/**
* Extensions that should always use fallback chunking instead of tree-sitter parsing.
* These are typically languages that don't have a proper WASM parser available
* or where the parser doesn't work correctly.
*
* NOTE: Only extensions that are already in the supported extensions list can be added here.
* To add support for new file types, they must first be added to the tree-sitter extensions list.
*
* HOW TO ADD A NEW FALLBACK EXTENSION:
* 1. First ensure the extension is in src/services/tree-sitter/index.ts extensions array
* 2. Add the extension to the fallbackExtensions array below
* 3. The file will automatically use length-based chunking for indexing
*
* Note: Do NOT remove parser cases from languageParser.ts as they may be used elsewhere
*/
export const fallbackExtensions = [
".vb", // Visual Basic .NET - no dedicated WASM parser
".scala", // Scala - uses fallback chunking instead of Lua query workaround
]

/**
* Check if a file extension should use fallback chunking
* @param extension File extension (including the dot)
* @returns true if the extension should use fallback chunking
*/
export function shouldUseFallbackChunking(extension: string): boolean {
return fallbackExtensions.includes(extension.toLowerCase())
}
2 changes: 2 additions & 0 deletions src/services/tree-sitter/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ const extensions = [
// Embedded Template
"ejs",
"erb",
// Visual Basic .NET
"vb",
].map((e) => `.${e}`)

export { extensions }
Expand Down
Loading