Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 105 additions & 18 deletions src/services/glob/list-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,22 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb
// Get ripgrep path
const rgPath = await getRipgrepPath()

// Get files using ripgrep
const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, limit)

// Get directories with proper filtering
// Get directories with proper filtering first to ensure we capture the structure
const gitignorePatterns = await parseGitignoreFile(dirPath, recursive)
const directories = await listFilteredDirectories(dirPath, recursive, gitignorePatterns)

// Get files using ripgrep with a higher limit to avoid early termination
// We'll use a higher internal limit and then apply balanced sampling
const internalLimit = Math.max(limit * 3, 1000) // Use 3x the requested limit or 1000, whichever is higher
const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, internalLimit)

// Apply balanced sampling to ensure fair representation across directories
// Reserve some space for directories in the limit
const filesLimit = Math.max(limit - directories.length, Math.floor(limit * 0.8))
const balancedFiles = applyBalancedSampling(files, directories, filesLimit)

// Combine and format the results
return formatAndCombineResults(files, directories, limit)
return formatAndCombineResults(balancedFiles, directories, limit)
}

/**
Expand Down Expand Up @@ -304,6 +311,91 @@ function isIgnoredByGitignore(dirName: string, gitignorePatterns: string[]): boo
return false
}

/**
* Apply balanced sampling to ensure fair representation across directories
* This prevents one large directory from dominating the file list
*/
function applyBalancedSampling(files: string[], directories: string[], limit: number): string[] {
if (files.length <= limit) {
return files
}

// Group files by their parent directory
const filesByDirectory = new Map<string, string[]>()

for (const file of files) {
const dir = path.dirname(file)
if (!filesByDirectory.has(dir)) {
filesByDirectory.set(dir, [])
}
filesByDirectory.get(dir)!.push(file)
}

// Improved balanced sampling algorithm
const dirEntries = Array.from(filesByDirectory.entries())
dirEntries.sort(([a], [b]) => a.localeCompare(b))

const result: string[] = []
const dirCount = dirEntries.length

// Ensure each directory gets at least a minimum number of files
const minFilesPerDir = Math.max(3, Math.floor(limit / (dirCount * 4))) // At least 3 files per dir, or 1/4 of fair share
const maxFilesPerDir = Math.floor(limit / dirCount) + 10 // Allow some directories to have more files

// First pass: give each directory its minimum allocation
let remainingLimit = limit
const dirAllocations = new Map<string, number>()

for (const [dir, dirFiles] of dirEntries) {
const allocation = Math.min(minFilesPerDir, dirFiles.length, remainingLimit)
dirAllocations.set(dir, allocation)
remainingLimit -= allocation
}

// Second pass: distribute remaining slots proportionally to directory sizes
if (remainingLimit > 0) {
const totalFiles = dirEntries.reduce((sum, [, dirFiles]) => sum + dirFiles.length, 0)

for (const [dir, dirFiles] of dirEntries) {
const currentAllocation = dirAllocations.get(dir)!
const proportion = dirFiles.length / totalFiles
const additionalSlots = Math.min(
Math.floor(remainingLimit * proportion),
maxFilesPerDir - currentAllocation,
dirFiles.length - currentAllocation,
)

if (additionalSlots > 0) {
dirAllocations.set(dir, currentAllocation + additionalSlots)
remainingLimit -= additionalSlots
}
}
}

// Third pass: distribute any remaining slots to directories that can take them
for (const [dir, dirFiles] of dirEntries) {
if (remainingLimit <= 0) break

const currentAllocation = dirAllocations.get(dir)!
const canTakeMore = Math.min(remainingLimit, dirFiles.length - currentAllocation)

if (canTakeMore > 0) {
dirAllocations.set(dir, currentAllocation + canTakeMore)
remainingLimit -= canTakeMore
}
}

// Collect the files based on allocations
for (const [dir, dirFiles] of dirEntries) {
const allocation = dirAllocations.get(dir)!
dirFiles.sort() // Ensure consistent ordering
const selectedFiles = dirFiles.slice(0, allocation)
result.push(...selectedFiles)
}

return result
}

/**
* Combine file and directory results and format them properly
*/
Expand Down Expand Up @@ -338,23 +430,20 @@ async function execRipgrep(rgPath: string, args: string[], limit: number): Promi
let output = ""
let results: string[] = []

// Set timeout to avoid hanging
// Set timeout to avoid hanging - increased to allow more complete traversal
const timeoutId = setTimeout(() => {
rgProcess.kill()
console.warn("ripgrep timed out, returning partial results")
resolve(results.slice(0, limit))
}, 10_000)
resolve(results) // Don't slice here either
}, 15_000)

// Process stdout data as it comes in
rgProcess.stdout.on("data", (data) => {
output += data.toString()
processRipgrepOutput()

// Kill the process if we've reached the limit
if (results.length >= limit) {
rgProcess.kill()
clearTimeout(timeoutId) // Clear the timeout when we kill the process due to reaching the limit
}
// Don't kill the process early - let it complete to get full directory structure
// The balanced sampling will be applied later in applyBalancedSampling
})

// Process stderr but don't fail on non-zero exit codes
Expand All @@ -375,7 +464,7 @@ async function execRipgrep(rgPath: string, args: string[], limit: number): Promi
console.warn(`ripgrep process exited with code ${code}, returning partial results`)
}

resolve(results.slice(0, limit))
resolve(results) // Don't slice here - let balanced sampling handle the limit
})

// Handle process errors
Expand All @@ -396,12 +485,10 @@ async function execRipgrep(rgPath: string, args: string[], limit: number): Promi
output = ""
}

// Process each complete line
// Process each complete line - don't limit here, let balanced sampling handle it
for (const line of lines) {
if (line.trim() && results.length < limit) {
if (line.trim()) {
results.push(line)
} else if (results.length >= limit) {
break
}
}
}
Expand Down
1 change: 1 addition & 0 deletions test-issue-5301/a/file001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 1
1 change: 1 addition & 0 deletions test-issue-5301/a/file002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 2
1 change: 1 addition & 0 deletions test-issue-5301/a/file003.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 3
1 change: 1 addition & 0 deletions test-issue-5301/a/file004.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 4
1 change: 1 addition & 0 deletions test-issue-5301/a/file005.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 5
1 change: 1 addition & 0 deletions test-issue-5301/a/file006.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 6
1 change: 1 addition & 0 deletions test-issue-5301/a/file007.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 7
1 change: 1 addition & 0 deletions test-issue-5301/a/file008.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 8
1 change: 1 addition & 0 deletions test-issue-5301/a/file009.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 9
1 change: 1 addition & 0 deletions test-issue-5301/a/file010.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 10
1 change: 1 addition & 0 deletions test-issue-5301/a/file011.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 11
1 change: 1 addition & 0 deletions test-issue-5301/a/file012.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 12
1 change: 1 addition & 0 deletions test-issue-5301/a/file013.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 13
1 change: 1 addition & 0 deletions test-issue-5301/a/file014.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 14
1 change: 1 addition & 0 deletions test-issue-5301/a/file015.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 15
1 change: 1 addition & 0 deletions test-issue-5301/a/file016.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 16
1 change: 1 addition & 0 deletions test-issue-5301/a/file017.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 17
1 change: 1 addition & 0 deletions test-issue-5301/a/file018.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 18
1 change: 1 addition & 0 deletions test-issue-5301/a/file019.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 19
1 change: 1 addition & 0 deletions test-issue-5301/a/file020.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 20
1 change: 1 addition & 0 deletions test-issue-5301/a/file021.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 21
1 change: 1 addition & 0 deletions test-issue-5301/a/file022.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 22
1 change: 1 addition & 0 deletions test-issue-5301/a/file023.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 23
1 change: 1 addition & 0 deletions test-issue-5301/a/file024.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 24
1 change: 1 addition & 0 deletions test-issue-5301/a/file025.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 25
1 change: 1 addition & 0 deletions test-issue-5301/a/file026.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 26
1 change: 1 addition & 0 deletions test-issue-5301/a/file027.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 27
1 change: 1 addition & 0 deletions test-issue-5301/a/file028.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 28
1 change: 1 addition & 0 deletions test-issue-5301/a/file029.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 29
1 change: 1 addition & 0 deletions test-issue-5301/a/file030.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 30
1 change: 1 addition & 0 deletions test-issue-5301/a/file031.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 31
1 change: 1 addition & 0 deletions test-issue-5301/a/file032.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 32
1 change: 1 addition & 0 deletions test-issue-5301/a/file033.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 33
1 change: 1 addition & 0 deletions test-issue-5301/a/file034.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 34
1 change: 1 addition & 0 deletions test-issue-5301/a/file035.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 35
1 change: 1 addition & 0 deletions test-issue-5301/a/file036.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 36
1 change: 1 addition & 0 deletions test-issue-5301/a/file037.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 37
1 change: 1 addition & 0 deletions test-issue-5301/a/file038.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 38
1 change: 1 addition & 0 deletions test-issue-5301/a/file039.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 39
1 change: 1 addition & 0 deletions test-issue-5301/a/file040.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 40
1 change: 1 addition & 0 deletions test-issue-5301/a/file041.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 41
1 change: 1 addition & 0 deletions test-issue-5301/a/file042.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 42
1 change: 1 addition & 0 deletions test-issue-5301/a/file043.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 43
1 change: 1 addition & 0 deletions test-issue-5301/a/file044.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 44
1 change: 1 addition & 0 deletions test-issue-5301/a/file045.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 45
1 change: 1 addition & 0 deletions test-issue-5301/a/file046.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 46
1 change: 1 addition & 0 deletions test-issue-5301/a/file047.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 47
1 change: 1 addition & 0 deletions test-issue-5301/a/file048.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 48
1 change: 1 addition & 0 deletions test-issue-5301/a/file049.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 49
1 change: 1 addition & 0 deletions test-issue-5301/a/file050.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 50
1 change: 1 addition & 0 deletions test-issue-5301/a/file051.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 51
1 change: 1 addition & 0 deletions test-issue-5301/a/file052.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 52
1 change: 1 addition & 0 deletions test-issue-5301/a/file053.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 53
1 change: 1 addition & 0 deletions test-issue-5301/a/file054.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 54
1 change: 1 addition & 0 deletions test-issue-5301/a/file055.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 55
1 change: 1 addition & 0 deletions test-issue-5301/a/file056.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 56
1 change: 1 addition & 0 deletions test-issue-5301/a/file057.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 57
1 change: 1 addition & 0 deletions test-issue-5301/a/file058.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 58
1 change: 1 addition & 0 deletions test-issue-5301/a/file059.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 59
1 change: 1 addition & 0 deletions test-issue-5301/a/file060.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 60
1 change: 1 addition & 0 deletions test-issue-5301/a/file061.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 61
1 change: 1 addition & 0 deletions test-issue-5301/a/file062.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 62
1 change: 1 addition & 0 deletions test-issue-5301/a/file063.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 63
1 change: 1 addition & 0 deletions test-issue-5301/a/file064.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 64
1 change: 1 addition & 0 deletions test-issue-5301/a/file065.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 65
1 change: 1 addition & 0 deletions test-issue-5301/a/file066.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 66
1 change: 1 addition & 0 deletions test-issue-5301/a/file067.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 67
1 change: 1 addition & 0 deletions test-issue-5301/a/file068.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 68
1 change: 1 addition & 0 deletions test-issue-5301/a/file069.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 69
1 change: 1 addition & 0 deletions test-issue-5301/a/file070.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 70
1 change: 1 addition & 0 deletions test-issue-5301/a/file071.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 71
1 change: 1 addition & 0 deletions test-issue-5301/a/file072.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 72
1 change: 1 addition & 0 deletions test-issue-5301/a/file073.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 73
1 change: 1 addition & 0 deletions test-issue-5301/a/file074.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 74
1 change: 1 addition & 0 deletions test-issue-5301/a/file075.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Content of file 75
Loading
Loading