diff --git a/app/aws-lsp-codewhisperer-runtimes/scripts/package.sh b/app/aws-lsp-codewhisperer-runtimes/scripts/package.sh index 5ebfc9aafc..b510bda8dc 100755 --- a/app/aws-lsp-codewhisperer-runtimes/scripts/package.sh +++ b/app/aws-lsp-codewhisperer-runtimes/scripts/package.sh @@ -117,6 +117,7 @@ done cd ./build/private/bundle for config in "${configs[@]}"; do cd ${config} + cp $START_DIR/../../server/aws-lsp-codewhisperer/src/shared/fileProcessingWorker.js ./fileProcessingWorker.js zip -r ../../../../$ARCHIVES_DIR/${config}/win-x64/servers.zip . zip -r ../../../../$ARCHIVES_DIR/${config}/linux-x64/servers.zip . zip -r ../../../../$ARCHIVES_DIR/${config}/mac-x64/servers.zip . diff --git a/app/aws-lsp-codewhisperer-runtimes/webpack.config.js b/app/aws-lsp-codewhisperer-runtimes/webpack.config.js index f10f3d0202..dc972e451a 100644 --- a/app/aws-lsp-codewhisperer-runtimes/webpack.config.js +++ b/app/aws-lsp-codewhisperer-runtimes/webpack.config.js @@ -90,6 +90,7 @@ const webworkerConfig = { tls: false, http2: false, buffer: require.resolve('buffer/'), + worker_threads: false, }, extensions: ['.ts', '.tsx', '.js', '.jsx'], }, diff --git a/server/aws-lsp-codewhisperer/src/shared/constants.ts b/server/aws-lsp-codewhisperer/src/shared/constants.ts index 33f61a079f..47bd84c3f5 100644 --- a/server/aws-lsp-codewhisperer/src/shared/constants.ts +++ b/server/aws-lsp-codewhisperer/src/shared/constants.ts @@ -190,4 +190,8 @@ export const COMMON_GITIGNORE_PATTERNS = [ '**/.vercel/**', '**/node_repl_history', '**/php_errorlog', + + // Python Specific + '.venv', + 'venv', ] diff --git a/server/aws-lsp-codewhisperer/src/shared/fileProcessingWorker.js b/server/aws-lsp-codewhisperer/src/shared/fileProcessingWorker.js new file mode 100644 index 0000000000..d5ab41cd5c --- /dev/null +++ b/server/aws-lsp-codewhisperer/src/shared/fileProcessingWorker.js @@ -0,0 +1,83 @@ +const { parentPort } = require('worker_threads') +const fs = require('fs') + +const uniqueFiles = new Set() +let filesExceedingMaxSize = 0 +let maxFileSize +let remainingIndexSize + +function getFileExtensionName(filepath) { + if (!filepath || !filepath.includes('.') || filepath.endsWith('.')) { + return '' + } + if (filepath.startsWith('.') && filepath.indexOf('.', 1) === -1) { + return '' + } + return filepath.substring(filepath.lastIndexOf('.') + 1).toLowerCase() +} + +parentPort.on('message', message => { + const { type, data } = message + + try { + if (type === 'init') { + const { maxFileSizeMB, maxIndexSizeMB } = data + const MB_TO_BYTES = 1024 * 1024 + maxFileSize = maxFileSizeMB * MB_TO_BYTES + remainingIndexSize = maxIndexSizeMB * MB_TO_BYTES + parentPort.postMessage({ type: 'ready' }) + } else if (type === 'processBatch') { + const { files, fileExtensions } = data + + for (const file of files) { + const fileExtName = '.' + getFileExtensionName(file) + if (!uniqueFiles.has(file) && fileExtensions.includes(fileExtName)) { + try { + const fileSize = fs.statSync(file).size + if (fileSize < maxFileSize) { + if (remainingIndexSize > fileSize) { + uniqueFiles.add(file) + remainingIndexSize -= fileSize + } else { + parentPort.postMessage({ + type: 'result', + data: { + files: [...uniqueFiles], + filesExceedingMaxSize, + reachedLimit: true, + }, + }) + return + } + } else { + filesExceedingMaxSize++ + } + } catch (error) { + // Skip files that can't be accessed + } + } + } + + parentPort.postMessage({ type: 'batchComplete' }) + } else if (type === 'complete') { + parentPort.postMessage({ + type: 'result', + data: { + files: [...uniqueFiles], + filesExceedingMaxSize, + reachedLimit: false, + }, + }) + } else { + parentPort.postMessage({ + type: 'error', + error: `Unknown message type: ${type}`, + }) + } + } catch (error) { + parentPort.postMessage({ + type: 'error', + error: error.message, + }) + } +}) diff --git a/server/aws-lsp-codewhisperer/src/shared/localProjectContextController.ts b/server/aws-lsp-codewhisperer/src/shared/localProjectContextController.ts index 63143f1c75..4ff7ad779a 100644 --- a/server/aws-lsp-codewhisperer/src/shared/localProjectContextController.ts +++ b/server/aws-lsp-codewhisperer/src/shared/localProjectContextController.ts @@ -404,10 +404,119 @@ export class LocalProjectContextController { } this.log.info(`Processing ${workspaceFolders.length} workspace folders...`) + const startTime = Date.now() maxFileSizeMB = Math.min(maxFileSizeMB ?? Infinity, this.DEFAULT_MAX_FILE_SIZE_MB) maxIndexSizeMB = Math.min(maxIndexSizeMB ?? Infinity, this.DEFAULT_MAX_INDEX_SIZE_MB) + try { + const { Worker } = await import('worker_threads') + const workerPath = path.join(__dirname, 'fileProcessingWorker.js') + + if (!fs.existsSync(workerPath)) { + throw new Error(`Worker file not found: ${workerPath}`) + } + + this.log.info(`Processing ${workspaceFolders.length} workspace folders in worker thread`) + const worker = new Worker(workerPath) + + return await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + void worker.terminate() + reject(new Error('Worker timeout after 5 minutes')) + }, 300_000) + + let batchesInProgress = 0 + + worker.on('message', msg => { + if (msg.type === 'ready') { + // Worker initialized, start sending batches + sendBatches().catch(reject) + } else if (msg.type === 'batchComplete') { + batchesInProgress-- + } else if (msg.type === 'result') { + clearTimeout(timeout) + void worker.terminate() + const { files, filesExceedingMaxSize, reachedLimit } = msg.data + const duration = Date.now() - startTime + if (reachedLimit) { + this.log.info( + `Reaching max file collection size limit ${maxIndexSizeMB} MB. ${files.length} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB (took ${duration}ms)` + ) + } else { + this.log.info( + `ProcessWorkspaceFolders complete. ${files.length} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB (took ${duration}ms using worker thread)` + ) + } + resolve(files) + } else if (msg.type === 'error') { + clearTimeout(timeout) + void worker.terminate() + reject(new Error(msg.error)) + } + }) + + worker.on('error', err => { + clearTimeout(timeout) + void worker.terminate() + reject(err) + }) + + async function sendBatches() { + const BATCH_SIZE = 10000 + + for (const folder of workspaceFolders!) { + const folderPath = path.resolve(URI.parse(folder.uri).fsPath) + const filesInFolder = await listFilesWithGitignore(folderPath) + + for (let i = 0; i < filesInFolder.length; i += BATCH_SIZE) { + const batch = filesInFolder.slice(i, i + BATCH_SIZE) + batchesInProgress++ + worker.postMessage({ + type: 'processBatch', + data: { files: batch, fileExtensions }, + }) + + // Wait if too many batches in progress + while (batchesInProgress > 5) { + await sleep(10) + } + } + } + + // Wait for all batches to complete + while (batchesInProgress > 0) { + await sleep(10) + } + + worker.postMessage({ type: 'complete' }) + } + + worker.postMessage({ + type: 'init', + data: { maxFileSizeMB, maxIndexSizeMB }, + }) + }) + } catch (error) { + this.log.warn(`Worker thread failed, falling back to main thread: ${error}`) + const result = await this.processWorkspaceFoldersFallback( + workspaceFolders, + maxFileSizeMB, + maxIndexSizeMB, + fileExtensions + ) + const duration = Date.now() - startTime + this.log.info(`Processing completed in ${duration}ms (fallback)`) + return result + } + } + + private async processWorkspaceFoldersFallback( + workspaceFolders: WorkspaceFolder[], + maxFileSizeMB: number, + maxIndexSizeMB: number, + fileExtensions?: string[] + ): Promise { const sizeConstraints: SizeConstraints = { maxFileSize: maxFileSizeMB * this.MB_TO_BYTES, remainingIndexSize: maxIndexSizeMB * this.MB_TO_BYTES, @@ -429,7 +538,7 @@ export class LocalProjectContextController { sizeConstraints.remainingIndexSize = sizeConstraints.remainingIndexSize - fileSize } else { this.log.info( - `Reaching max file collection size limit ${this.maxIndexSizeMB} MB. ${uniqueFilesToIndex.size} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB ` + `Reaching max file collection size limit ${maxIndexSizeMB} MB. ${uniqueFilesToIndex.size} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB ` ) return [...uniqueFilesToIndex] } @@ -446,7 +555,7 @@ export class LocalProjectContextController { } this.log.info( - `ProcessWorkspaceFolders complete. ${uniqueFilesToIndex.size} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB` + `ProcessWorkspaceFolders complete. ${uniqueFilesToIndex.size} files found. ${filesExceedingMaxSize} files exceeded ${maxFileSizeMB} MB (fallback)` ) return [...uniqueFilesToIndex] }