RooCodeInc · roomote · Jul 14, 2025 · adamhill · Jul 15, 2025
@@ -13,6 +13,8 @@ export const DEFAULT_MAX_SEARCH_RESULTS = CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH
 /**File Watcher */
 export const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479"
 export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB
+export const MAX_SWIFT_FILE_SIZE_BYTES = 512 * 1024 // 512KB - Swift files can be memory intensive
+export const MEMORY_CHECK_INTERVAL_FILES = 10 // Check memory every N files
 
 /**Directory Scanner */
 export const MAX_LIST_FILES_LIMIT = 3_000

@@ -6,7 +6,15 @@ import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/l
 import { parseMarkdown } from "../../tree-sitter/markdownParser"
 import { ICodeParser, CodeBlock } from "../interfaces"
 import { scannerExtensions } from "../shared/supported-extensions"
-import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
+import {
+	MAX_BLOCK_CHARS,
+	MIN_BLOCK_CHARS,
+	MIN_CHUNK_REMAINDER_CHARS,
+	MAX_CHARS_TOLERANCE_FACTOR,
+	MAX_SWIFT_FILE_SIZE_BYTES,
+	MEMORY_CHECK_INTERVAL_FILES,
+} from "../constants"
+import { MemoryMonitor } from "../utils/memoryMonitor"
 import { TelemetryService } from "@roo-code/telemetry"
 import { TelemetryEventName } from "@roo-code/types"
 import { sanitizeErrorMessage } from "../shared/validation-helpers"
@@ -17,6 +25,8 @@ import { sanitizeErrorMessage } from "../shared/validation-helpers"
 export class CodeParser implements ICodeParser {
 	private loadedParsers: LanguageParser = {}
 	private pendingLoads: Map<string, Promise<LanguageParser>> = new Map()
+	private memoryMonitor = MemoryMonitor.getInstance()
+	private filesProcessed = 0
 	// Markdown files are now supported using the custom markdown parser
 	// which extracts headers and sections for semantic indexing
 
@@ -33,6 +43,17 @@ export class CodeParser implements ICodeParser {
 			fileHash?: string
 		},
 	): Promise<CodeBlock[]> {
+		// Periodic memory monitoring
+		this.filesProcessed++
+		if (this.filesProcessed % MEMORY_CHECK_INTERVAL_FILES === 0) {
+			const isHighMemory = this.memoryMonitor.checkAndCleanup()
+			if (isHighMemory) {
+				console.warn(
+					`High memory usage detected (${this.memoryMonitor.getMemoryUsageMB()}MB) after processing ${this.filesProcessed} files`,
+				)
+			}
+		}
+
 		// Get file extension
 		const ext = path.extname(filePath).toLowerCase()
 
@@ -50,6 +71,23 @@ export class CodeParser implements ICodeParser {
 			fileHash = options.fileHash || this.createFileHash(content)
 		} else {
 			try {
+				// Check file size before reading for Swift files
+				if (ext === ".swift") {
+					const stats = await readFile(filePath, "utf8")
+						.then((content) => ({ size: Buffer.byteLength(content, "utf8") }))
+						.catch(() => null)
+					if (stats && stats.size > MAX_SWIFT_FILE_SIZE_BYTES) {
+						console.warn(
+							`Skipping large Swift file ${filePath} (${Math.round(stats.size / 1024)}KB > ${Math.round(MAX_SWIFT_FILE_SIZE_BYTES / 1024)}KB limit)`,
+						)
+						TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
+							error: `Swift file too large: ${stats.size} bytes`,
+							location: "parseFile:fileSizeCheck",
+						})
+						return []
+					}
+				}
+
 				content = await readFile(filePath, "utf8")
 				fileHash = this.createFileHash(content)
 			} catch (error) {
@@ -63,6 +101,14 @@ export class CodeParser implements ICodeParser {
 			}
 		}
 
+		// Additional memory check before parsing large files
+		if (content.length > MAX_SWIFT_FILE_SIZE_BYTES && this.memoryMonitor.isMemoryPressure()) {
+			console.warn(
+				`Skipping file ${filePath} due to memory pressure (${this.memoryMonitor.getMemoryUsageMB()}MB used)`,
+			)
+			return []
+		}
+
 		// Parse the file
 		return this.parseContent(filePath, content, fileHash)
 	}
@@ -144,84 +190,122 @@ export class CodeParser implements ICodeParser {
 			return []
 		}
 
-		const tree = language.parser.parse(content)
+		let tree: any = null
+		let captures: any[] = []
 
-		// We don't need to get the query string from languageQueries since it's already loaded
-		// in the language object
-		const captures = tree ? language.query.captures(tree.rootNode) : []
-
-		// Check if captures are empty
-		if (captures.length === 0) {
-			if (content.length >= MIN_BLOCK_CHARS) {
-				// Perform fallback chunking if content is large enough
-				const blocks = this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
-				return blocks
-			} else {
-				// Return empty if content is too small for fallback
+		try {
+			// Check memory before parsing
+			if (this.memoryMonitor.isMemoryPressure()) {
+				console.warn(`Skipping parsing ${filePath} due to memory pressure`)
 				return []
 			}
-		}
 
-		const results: CodeBlock[] = []
+			tree = language.parser.parse(content)
 
-		// Process captures if not empty
-		const queue: Node[] = Array.from(captures).map((capture) => capture.node)
+			// We don't need to get the query string from languageQueries since it's already loaded
+			// in the language object
+			captures = tree ? language.query.captures(tree.rootNode) : []
 
-		while (queue.length > 0) {
-			const currentNode = queue.shift()!
-			// const lineSpan = currentNode.endPosition.row - currentNode.startPosition.row + 1 // Removed as per lint error
+			// Check if captures are empty
+			if (captures.length === 0) {
+				if (content.length >= MIN_BLOCK_CHARS) {
+					// Perform fallback chunking if content is large enough
+					const blocks = this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes)
+					return blocks
+				} else {
+					// Return empty if content is too small for fallback
+					return []
+				}
+			}
+
+			const results: CodeBlock[] = []
+
+			// Process captures if not empty
+			const queue: Node[] = Array.from(captures).map((capture) => capture.node)
+			let processedNodes = 0
+			const maxNodesToProcess = 1000 // Limit to prevent excessive memory usage
+
+			while (queue.length > 0 && processedNodes < maxNodesToProcess) {
+				// Periodic memory check during processing
+				if (processedNodes % 100 === 0 && this.memoryMonitor.isMemoryPressure()) {
+					console.warn(
+						`Stopping node processing for ${filePath} due to memory pressure after ${processedNodes} nodes`,
+					)
+					break
+				}
 
-			// Check if the node meets the minimum character requirement
-			if (currentNode.text.length >= MIN_BLOCK_CHARS) {
-				// If it also exceeds the maximum character limit, try to break it down
-				if (currentNode.text.length > MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR) {
-					if (currentNode.children.filter((child) => child !== null).length > 0) {
-						// If it has children, process them instead
-						queue.push(...currentNode.children.filter((child) => child !== null))
+				const currentNode = queue.shift()!
+				processedNodes++
+
+				// Check if the node meets the minimum character requirement
+				if (currentNode.text.length >= MIN_BLOCK_CHARS) {
+					// If it also exceeds the maximum character limit, try to break it down
+					if (currentNode.text.length > MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR) {
+						if (currentNode.children.filter((child) => child !== null).length > 0) {
+							// If it has children, process them instead (but limit queue growth)
+							const validChildren = currentNode.children.filter((child) => child !== null)
+							if (queue.length + validChildren.length < maxNodesToProcess) {
+								queue.push(...validChildren)
+							}
+						} else {
+							// If it's a leaf node, chunk it
+							const chunkedBlocks = this._chunkLeafNodeByLines(
+								currentNode,
+								filePath,
+								fileHash,
+								seenSegmentHashes,
+							)
+							results.push(...chunkedBlocks)
+						}
 					} else {
-						// If it's a leaf node, chunk it
-						const chunkedBlocks = this._chunkLeafNodeByLines(
-							currentNode,
-							filePath,
-							fileHash,
-							seenSegmentHashes,
-						)
-						results.push(...chunkedBlocks)
-					}
-				} else {
-					// Node meets min chars and is within max chars, create a block
-					const identifier =
-						currentNode.childForFieldName("name")?.text ||
-						currentNode.children.find((c) => c?.type === "identifier")?.text ||
-						null
-					const type = currentNode.type
-					const start_line = currentNode.startPosition.row + 1
-					const end_line = currentNode.endPosition.row + 1
-					const content = currentNode.text
-					const contentPreview = content.slice(0, 100)
-					const segmentHash = createHash("sha256")
-						.update(`${filePath}-${start_line}-${end_line}-${content.length}-${contentPreview}`)
-						.digest("hex")
-
-					if (!seenSegmentHashes.has(segmentHash)) {
-						seenSegmentHashes.add(segmentHash)
-						results.push({
-							file_path: filePath,
-							identifier,
-							type,
-							start_line,
-							end_line,
-							content,
-							segmentHash,
-							fileHash,
-						})
+						// Node meets min chars and is within max chars, create a block
+						const identifier =
+							currentNode.childForFieldName("name")?.text ||
+							currentNode.children.find((c) => c?.type === "identifier")?.text ||
+							null
+						const type = currentNode.type
+						const start_line = currentNode.startPosition.row + 1
+						const end_line = currentNode.endPosition.row + 1
+						const nodeContent = currentNode.text
+						const contentPreview = nodeContent.slice(0, 100)
+						const segmentHash = createHash("sha256")
+							.update(`${filePath}-${start_line}-${end_line}-${nodeContent.length}-${contentPreview}`)
+							.digest("hex")
+
+						if (!seenSegmentHashes.has(segmentHash)) {
+							seenSegmentHashes.add(segmentHash)
+							results.push({
+								file_path: filePath,
+								identifier,
+								type,
+								start_line,
+								end_line,
+								content: nodeContent,
+								segmentHash,
+								fileHash,
+							})
+						}
 					}
 				}
+				// Nodes smaller than minBlockChars are ignored
 			}
-			// Nodes smaller than minBlockChars are ignored
-		}
 
-		return results
+			return results
+		} finally {
+			// Clean up tree-sitter resources
+			if (tree) {
+				try {
+					tree.delete?.()
+				} catch (e) {
+					// Ignore cleanup errors
+				}
+			}
+
+			// Force garbage collection for Swift files if available
+			if (ext === "swift" && global.gc) {
+				global.gc()
+			}
+		}
 	}
 
 	/**

@@ -17,19 +17,25 @@ import { t } from "../../../i18n"
 import {
 	QDRANT_CODE_BLOCK_NAMESPACE,
 	MAX_FILE_SIZE_BYTES,
+	MAX_SWIFT_FILE_SIZE_BYTES,
 	MAX_LIST_FILES_LIMIT,
 	BATCH_SEGMENT_THRESHOLD,
 	MAX_BATCH_RETRIES,
 	INITIAL_RETRY_DELAY_MS,
 	PARSING_CONCURRENCY,
 	BATCH_PROCESSING_CONCURRENCY,
+	MEMORY_CHECK_INTERVAL_FILES,
 } from "../constants"
+import { MemoryMonitor } from "../utils/memoryMonitor"
 import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
 import { TelemetryService } from "@roo-code/telemetry"
 import { TelemetryEventName } from "@roo-code/types"
 import { sanitizeErrorMessage } from "../shared/validation-helpers"
 
 export class DirectoryScanner implements IDirectoryScanner {
+	private memoryMonitor = MemoryMonitor.getInstance()
+	private filesProcessed = 0
+
 	constructor(
 		private readonly embedder: IEmbedder,
 		private readonly qdrantClient: IVectorStore,
@@ -107,9 +113,35 @@ export class DirectoryScanner implements IDirectoryScanner {
 		const parsePromises = supportedPaths.map((filePath) =>
 			parseLimiter(async () => {
 				try {
-					// Check file size
+					// Periodic memory monitoring
+					this.filesProcessed++
+					if (this.filesProcessed % MEMORY_CHECK_INTERVAL_FILES === 0) {
+						const isHighMemory = this.memoryMonitor.checkAndCleanup()
+						if (isHighMemory) {
+							console.warn(
+								`High memory usage detected (${this.memoryMonitor.getMemoryUsageMB()}MB) during directory scan after ${this.filesProcessed} files`,
+							)
+						}
+					}
+
+					// Check if memory pressure should stop processing
+					if (this.memoryMonitor.isMemoryPressure()) {
+						console.warn(
+							`Skipping file ${filePath} due to memory pressure (${this.memoryMonitor.getMemoryUsageMB()}MB used)`,
+						)
+						skippedCount++
+						return
+					}
+
+					// Check file size with Swift-specific limits
 					const stats = await stat(filePath)
-					if (stats.size > MAX_FILE_SIZE_BYTES) {
+					const ext = path.extname(filePath).toLowerCase()
+					const maxSize = ext === ".swift" ? MAX_SWIFT_FILE_SIZE_BYTES : MAX_FILE_SIZE_BYTES
+
+					if (stats.size > maxSize) {
+						console.warn(
+							`Skipping large ${ext} file ${filePath} (${Math.round(stats.size / 1024)}KB > ${Math.round(maxSize / 1024)}KB limit)`,
+						)
 						skippedCount++ // Skip large files
 						return
 					}
@@ -148,6 +180,34 @@ export class DirectoryScanner implements IDirectoryScanner {
 								const release = await mutex.acquire()
 								totalBlockCount += fileBlockCount
 								try {
+									// Check memory before adding to batch
+									if (this.memoryMonitor.isMemoryPressure()) {
+										console.warn(
+											`Memory pressure detected, forcing batch processing early (${currentBatchBlocks.length} blocks)`,
+										)
+										// Force process current batch before adding more
+										if (currentBatchBlocks.length > 0) {
+											const batchBlocks = [...currentBatchBlocks]
+											const batchTexts = [...currentBatchTexts]
+											const batchFileInfos = [...currentBatchFileInfos]
+											currentBatchBlocks = []
+											currentBatchTexts = []
+											currentBatchFileInfos = []
+
+											const batchPromise = batchLimiter(() =>
+												this.processBatch(
+													batchBlocks,
+													batchTexts,
+													batchFileInfos,
+													scanWorkspace,
+													onError,
+													onBlocksIndexed,
+												),
+											)
+											activeBatchPromises.push(batchPromise)
+										}
+									}
+
 									currentBatchBlocks.push(block)
 									currentBatchTexts.push(trimmedContent)
 									addedBlocksFromFile = true
@@ -160,8 +220,11 @@ export class DirectoryScanner implements IDirectoryScanner {
 										})
 									}
 
-									// Check if batch threshold is met
-									if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
+									// Check if batch threshold is met or memory pressure
+									if (
+										currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD ||
+										this.memoryMonitor.isMemoryPressure()
+									) {
 										// Copy current batch data and clear accumulators
 										const batchBlocks = [...currentBatchBlocks]
 										const batchTexts = [...currentBatchTexts]