RooCodeInc
diff --git a/‎src/services/code-index/__tests__/go-indexing-fix.spec.ts‎
Lines changed: 211 additions & 0 deletions b/‎src/services/code-index/__tests__/go-indexing-fix.spec.ts‎
Lines changed: 211 additions & 0 deletions
diff --git a/‎src/services/code-index/processors/parser.ts‎
Lines changed: 38 additions & 11 deletions b/‎src/services/code-index/processors/parser.ts‎
Lines changed: 38 additions & 11 deletions
@@ -0,0 +1,211 @@
+import { describe, it, expect, beforeAll, vi } from "vitest"
+import { CodeParser } from "../processors/parser"
+import * as languageParserModule from "../../tree-sitter/languageParser"
+import * as path from "path"
+
+describe("Go Indexing Fix", () => {
+	let wasmDir: string | undefined
+
+	beforeAll(async () => {
+		// Find WASM directory
+		const possibleWasmDirs = [path.join(__dirname, "../../../dist"), path.join(process.cwd(), "dist")]
+
+		for (const dir of possibleWasmDirs) {
+			try {
+				const fsSync = require("fs")
+				const wasmPath = path.join(dir, "tree-sitter-go.wasm")
+				if (fsSync.existsSync(wasmPath)) {
+					wasmDir = dir
+					break
+				}
+			} catch (e) {
+				// Continue searching
+			}
+		}
+
+		if (!wasmDir) {
+			throw new Error("Could not find WASM directory")
+		}
+
+		// Mock loadRequiredLanguageParsers to use our WASM directory
+		const originalLoad = languageParserModule.loadRequiredLanguageParsers
+		vi.spyOn(languageParserModule, "loadRequiredLanguageParsers").mockImplementation(
+			async (files: string[], customWasmDir?: string) => {
+				return originalLoad(files, customWasmDir || wasmDir)
+			},
+		)
+	})
+
+	it("should correctly index Go functions, methods, and types", async () => {
+		const parser = new CodeParser()
+
+		const goContent = `package main
+
+import (
+    "fmt"
+    "strings"
+)
+
+// User represents a user in the system
+type User struct {
+    ID       int
+    Name     string
+    Email    string
+    IsActive bool
+}
+
+// NewUser creates a new user instance
+func NewUser(id int, name, email string) *User {
+    return &User{
+        ID:       id,
+        Name:     name,
+        Email:    email,
+        IsActive: true,
+    }
+}
+
+// GetDisplayName returns the user's display name
+func (u *User) GetDisplayName() string {
+    return fmt.Sprintf("%s <%s>", u.Name, u.Email)
+}
+
+// Validate checks if the user data is valid
+func (u *User) Validate() error {
+    if u.Name == "" {
+        return fmt.Errorf("name cannot be empty")
+    }
+    if !strings.Contains(u.Email, "@") {
+        return fmt.Errorf("invalid email format")
+    }
+    return nil
+}
+
+// ProcessUsers processes a list of users
+func ProcessUsers(users []*User) {
+    for _, user := range users {
+        if err := user.Validate(); err != nil {
+            fmt.Printf("Invalid user %d: %v\n", user.ID, err)
+            continue
+        }
+        fmt.Println(user.GetDisplayName())
+    }
+}
+
+func main() {
+    users := []*User{
+        NewUser(1, "Alice", "[email protected]"),
+        NewUser(2, "Bob", "[email protected]"),
+    }
+    ProcessUsers(users)
+}`
+
+		const blocks = await parser.parseFile("test.go", {
+			content: goContent,
+			fileHash: "test-hash",
+		})
+
+		// Verify we got blocks
+		expect(blocks.length).toBeGreaterThan(0)
+
+		// Check for specific function declarations
+		const functionBlocks = blocks.filter((b) => b.type === "function_declaration")
+		const functionNames = functionBlocks.map((b) => b.identifier).sort()
+		expect(functionNames).toContain("NewUser")
+		expect(functionNames).toContain("ProcessUsers")
+		// Note: main function might be filtered out if it's less than 50 characters
+
+		// Check for method declarations
+		const methodBlocks = blocks.filter((b) => b.type === "method_declaration")
+		const methodNames = methodBlocks.map((b) => b.identifier).sort()
+		expect(methodNames).toContain("GetDisplayName")
+		expect(methodNames).toContain("Validate")
+
+		// Check for type declarations
+		const typeBlocks = blocks.filter((b) => b.type === "type_declaration")
+		expect(typeBlocks.length).toBeGreaterThan(0)
+
+		// Verify content is captured correctly
+		const newUserBlock = functionBlocks.find((b) => b.identifier === "NewUser")
+		expect(newUserBlock).toBeDefined()
+		expect(newUserBlock!.content).toContain("func NewUser")
+		expect(newUserBlock!.content).toContain("return &User{")
+
+		// Verify line numbers are correct
+		const validateBlock = methodBlocks.find((b) => b.identifier === "Validate")
+		expect(validateBlock).toBeDefined()
+		expect(validateBlock!.start_line).toBeGreaterThan(1)
+		expect(validateBlock!.end_line).toBeGreaterThan(validateBlock!.start_line)
+	})
+
+	it("should respect the 50-character threshold for Go", async () => {
+		const parser = new CodeParser()
+
+		const goContent = `package main
+
+// Short function - should be filtered out
+func f() {
+    return
+}
+
+// Longer function - should be included
+func calculateTotal(items []int) int {
+    total := 0
+    for _, item := range items {
+        total += item
+    }
+    return total
+}`
+
+		const blocks = await parser.parseFile("test.go", {
+			content: goContent,
+			fileHash: "test-hash",
+		})
+
+		// The short function should be filtered out
+		const functionBlocks = blocks.filter((b) => b.type === "function_declaration")
+		expect(functionBlocks.length).toBe(1)
+		expect(functionBlocks[0].identifier).toBe("calculateTotal")
+
+		// Verify the short function was not included
+		const shortFunction = functionBlocks.find((b) => b.identifier === "f")
+		expect(shortFunction).toBeUndefined()
+	})
+
+	it("should capture full declaration content, not just identifiers", async () => {
+		const parser = new CodeParser()
+
+		const goContent = `package main
+
+type Config struct {
+    Host     string
+    Port     int
+    Debug    bool
+    Timeout  int
+}
+
+func (c *Config) GetAddress() string {
+    return fmt.Sprintf("%s:%d", c.Host, c.Port)
+}`
+
+		const blocks = await parser.parseFile("test.go", {
+			content: goContent,
+			fileHash: "test-hash",
+		})
+
+		// Check that we capture the full struct declaration
+		const typeBlock = blocks.find((b) => b.type === "type_declaration")
+		if (typeBlock) {
+			expect(typeBlock.content).toContain("type Config struct")
+			expect(typeBlock.content).toContain("Host     string")
+			expect(typeBlock.content).toContain("Port     int")
+			expect(typeBlock.content).toContain("Debug    bool")
+			expect(typeBlock.content).toContain("Timeout  int")
+		}
+
+		// Check that we capture the full method declaration
+		const methodBlock = blocks.find((b) => b.type === "method_declaration" && b.identifier === "GetAddress")
+		expect(methodBlock).toBeDefined()
+		expect(methodBlock!.content).toContain("func (c *Config) GetAddress() string")
+		expect(methodBlock!.content).toContain("return fmt.Sprintf")
+	})
+})
@@ -7,6 +7,14 @@ import { ICodeParser, CodeBlock } from "../interfaces"
 import { scannerExtensions } from "../shared/supported-extensions"
 import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
 
+/**
+ * Language-specific minimum block character thresholds
+ */
+const LANGUAGE_THRESHOLDS: Record<string, number> = {
+	go: 50, // Go has concise syntax
+	default: MIN_BLOCK_CHARS, // Default for other languages (100)
+}
+
 /**
  * Implementation of the code parser interface
  */
@@ -67,6 +75,15 @@ export class CodeParser implements ICodeParser {
 		return scannerExtensions.includes(extension)
 	}
 
+	/**
+	 * Gets the minimum block character threshold for a language
+	 * @param language Language identifier
+	 * @returns Minimum character threshold
+	 */
+	private getMinBlockChars(language: string): number {
+		return LANGUAGE_THRESHOLDS[language] || LANGUAGE_THRESHOLDS.default
+	}
+
 	/**
 	 * Creates a hash for a file
 	 * @param content File content
@@ -86,6 +103,7 @@ export class CodeParser implements ICodeParser {
 	private async parseContent(filePath: string, content: string, fileHash: string): Promise<CodeBlock[]> {
 		const ext = path.extname(filePath).slice(1).toLowerCase()
 		const seenSegmentHashes = new Set<string>()
+		const minBlockChars = this.getMinBlockChars(ext)
 
 		// Check if we already have the parser loaded
 		if (!this.loadedParsers[ext]) {
@@ -128,9 +146,15 @@ export class CodeParser implements ICodeParser {
 
 		// Check if captures are empty
 		if (captures.length === 0) {
-			if (content.length >= MIN_BLOCK_CHARS) {
+			if (content.length >= minBlockChars) {
 				// Perform fallback chunking if content is large enough
-				const blocks = this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
+				const blocks = this._performFallbackChunking(
+					filePath,
+					content,
+					fileHash,
+					seenSegmentHashes,
+					minBlockChars,
+				)
 				return blocks
 			} else {
 				// Return empty if content is too small for fallback
@@ -148,20 +172,20 @@ export class CodeParser implements ICodeParser {
 			// const lineSpan = currentNode.endPosition.row - currentNode.startPosition.row + 1 // Removed as per lint error
 
 			// Check if the node meets the minimum character requirement
-			if (currentNode.text.length >= MIN_BLOCK_CHARS) {
+			if (currentNode.text.length >= minBlockChars) {
 				// If it also exceeds the maximum character limit, try to break it down
 				if (currentNode.text.length > MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR) {
 					if (currentNode.children.filter((child) => child !== null).length > 0) {
 						// If it has children, process them instead
 						queue.push(...currentNode.children.filter((child) => child !== null))
 					} else {
-						// If it's a leaf node, chunk it (passing MIN_BLOCK_CHARS as per Task 1 Step 5)
-						// Note: _chunkLeafNodeByLines logic might need further adjustment later
+						// If it's a leaf node, chunk it
 						const chunkedBlocks = this._chunkLeafNodeByLines(
 							currentNode,
 							filePath,
 							fileHash,
 							seenSegmentHashes,
+							minBlockChars,
 						)
 						results.push(...chunkedBlocks)
 					}
@@ -194,7 +218,7 @@ export class CodeParser implements ICodeParser {
 					}
 				}
 			}
-			// Nodes smaller than MIN_BLOCK_CHARS are ignored
+			// Nodes smaller than minBlockChars are ignored
 		}
 
 		return results
@@ -207,9 +231,9 @@ export class CodeParser implements ICodeParser {
 		lines: string[],
 		filePath: string,
 		fileHash: string,
-
 		chunkType: string,
 		seenSegmentHashes: Set<string>,
+		minBlockChars: number,
 		baseStartLine: number = 1, // 1-based start line of the *first* line in the `lines` array
 	): CodeBlock[] {
 		const chunks: CodeBlock[] = []
@@ -219,7 +243,7 @@ export class CodeParser implements ICodeParser {
 		const effectiveMaxChars = MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
 
 		const finalizeChunk = (endLineIndex: number) => {
-			if (currentChunkLength >= MIN_BLOCK_CHARS && currentChunkLines.length > 0) {
+			if (currentChunkLength >= minBlockChars && currentChunkLines.length > 0) {
 				const chunkContent = currentChunkLines.join("\n")
 				const startLine = baseStartLine + chunkStartLineIndex
 				const endLine = baseStartLine + endLineIndex
@@ -300,7 +324,7 @@ export class CodeParser implements ICodeParser {
 				}
 
 				if (
-					currentChunkLength >= MIN_BLOCK_CHARS &&
+					currentChunkLength >= minBlockChars &&
 					remainderLength < MIN_CHUNK_REMAINDER_CHARS &&
 					currentChunkLines.length > 1
 				) {
@@ -311,7 +335,7 @@ export class CodeParser implements ICodeParser {
 						const potentialNextChunkLength = potentialNextChunkLines.join("\n").length + 1
 
 						if (
-							potentialChunkLength >= MIN_BLOCK_CHARS &&
+							potentialChunkLength >= minBlockChars &&
 							potentialNextChunkLength >= MIN_CHUNK_REMAINDER_CHARS
 						) {
 							splitIndex = k
@@ -348,16 +372,18 @@ export class CodeParser implements ICodeParser {
 		content: string,
 		fileHash: string,
 		seenSegmentHashes: Set<string>,
+		minBlockChars: number,
 	): CodeBlock[] {
 		const lines = content.split("\n")
-		return this._chunkTextByLines(lines, filePath, fileHash, "fallback_chunk", seenSegmentHashes)
+		return this._chunkTextByLines(lines, filePath, fileHash, "fallback_chunk", seenSegmentHashes, minBlockChars)
 	}
 
 	private _chunkLeafNodeByLines(
 		node: Node,
 		filePath: string,
 		fileHash: string,
 		seenSegmentHashes: Set<string>,
+		minBlockChars: number,
 	): CodeBlock[] {
 		const lines = node.text.split("\n")
 		const baseStartLine = node.startPosition.row + 1
@@ -367,6 +393,7 @@ export class CodeParser implements ICodeParser {
 			fileHash,
 			node.type, // Use the node's type
 			seenSegmentHashes,
+			minBlockChars,
 			baseStartLine,
 		)
 	}