Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 243 additions & 0 deletions src/services/code-index/__tests__/manager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ import { CodeIndexManager } from "../manager"
import { CodeIndexServiceFactory } from "../service-factory"
import type { MockedClass } from "vitest"
import * as path from "path"
import * as fs from "fs/promises"
import ignore from "ignore"

// Mock fs/promises module
vi.mock("fs/promises")

// Mock ignore module
vi.mock("ignore")

// Mock vscode module
vi.mock("vscode", () => {
Expand Down Expand Up @@ -581,4 +589,239 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
consoleErrorSpy.mockRestore()
})
})

describe("gitignore pattern handling", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test coverage is comprehensive! Could we also add an edge case test for .gitignore files with extremely long lines or unusual encoding? This would ensure our solution is robust against all possible user inputs.

let mockIgnoreInstance: any
let mockConfigManager: any
let mockCacheManager: any
let mockServiceFactoryInstance: any

beforeEach(() => {
// Reset mocks
vi.clearAllMocks()

// Mock ignore instance
mockIgnoreInstance = {
add: vi.fn(),
ignores: vi.fn(() => false),
}

// Mock the ignore module to return our mock instance
vi.mocked(ignore).mockReturnValue(mockIgnoreInstance)

// Mock config manager
mockConfigManager = {
loadConfiguration: vi.fn().mockResolvedValue({ requiresRestart: false }),
isFeatureConfigured: true,
isFeatureEnabled: true,
getConfig: vi.fn().mockReturnValue({
isConfigured: true,
embedderProvider: "openai",
modelId: "text-embedding-3-small",
openAiOptions: { openAiNativeApiKey: "test-key" },
qdrantUrl: "http://localhost:6333",
qdrantApiKey: "test-key",
searchMinScore: 0.4,
}),
}
;(manager as any)._configManager = mockConfigManager

// Mock cache manager
mockCacheManager = {
initialize: vi.fn(),
clearCacheFile: vi.fn(),
}
;(manager as any)._cacheManager = mockCacheManager

// Mock service factory
mockServiceFactoryInstance = {
createServices: vi.fn().mockReturnValue({
embedder: { embedderInfo: { name: "openai" } },
vectorStore: {},
scanner: {},
fileWatcher: {
onDidStartBatchProcessing: vi.fn(),
onBatchProgressUpdate: vi.fn(),
watch: vi.fn(),
stopWatcher: vi.fn(),
dispose: vi.fn(),
},
}),
validateEmbedder: vi.fn().mockResolvedValue({ valid: true }),
}
MockedCodeIndexServiceFactory.mockImplementation(() => mockServiceFactoryInstance as any)
})

it("should handle invalid gitignore patterns gracefully", async () => {
// Arrange - Mock .gitignore with invalid pattern
const invalidGitignoreContent = `
# Valid patterns
node_modules/
*.log

# Invalid pattern - character range out of order
pqh[A-/]

# More valid patterns
dist/
.env
`
;(fs.readFile as any).mockResolvedValue(invalidGitignoreContent)

// Make the first add() call throw an error (simulating invalid pattern)
let addCallCount = 0
mockIgnoreInstance.add.mockImplementation((pattern: string) => {
addCallCount++
// Throw on first call (full content), succeed on individual patterns
if (addCallCount === 1) {
throw new Error(
"Invalid regular expression: /^pqh[A-\\/](?=$|\\/$)/i: Range out of order in character class",
)
}
// Throw on the specific invalid pattern
if (pattern.includes("pqh[A-/]")) {
throw new Error(
"Invalid regular expression: /^pqh[A-\\/](?=$|\\/$)/i: Range out of order in character class",
)
}
})

// Spy on console methods
const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {})

// Act
await (manager as any)._recreateServices()

// Assert - Should have logged warnings
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining("Warning: .gitignore contains invalid patterns"),
)
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining('Skipping invalid .gitignore pattern: "pqh[A-/]"'),
)

// Should have attempted to add valid patterns individually
expect(mockIgnoreInstance.add).toHaveBeenCalled()

// Should not throw an error - service creation should continue
expect(mockServiceFactoryInstance.createServices).toHaveBeenCalled()
expect(mockServiceFactoryInstance.validateEmbedder).toHaveBeenCalled()

// Cleanup
consoleWarnSpy.mockRestore()
})

it("should process valid gitignore patterns normally", async () => {
// Arrange - Mock .gitignore with all valid patterns
const validGitignoreContent = `
# Valid patterns
node_modules/
*.log
dist/
.env
`
;(fs.readFile as any).mockResolvedValue(validGitignoreContent)

// All add() calls succeed
mockIgnoreInstance.add.mockImplementation(() => {})

// Spy on console methods
const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {})

// Act
await (manager as any)._recreateServices()

// Assert - Should not have logged any warnings
expect(consoleWarnSpy).not.toHaveBeenCalled()

// Should have added the content and .gitignore itself
expect(mockIgnoreInstance.add).toHaveBeenCalledWith(validGitignoreContent)
expect(mockIgnoreInstance.add).toHaveBeenCalledWith(".gitignore")

// Service creation should proceed normally
expect(mockServiceFactoryInstance.createServices).toHaveBeenCalled()
expect(mockServiceFactoryInstance.validateEmbedder).toHaveBeenCalled()

// Cleanup
consoleWarnSpy.mockRestore()
})

it("should handle missing .gitignore file gracefully", async () => {
// Arrange - Mock file not found error
;(fs.readFile as any).mockRejectedValue(new Error("ENOENT: no such file or directory"))

// Spy on console methods
const consoleInfoSpy = vi.spyOn(console, "info").mockImplementation(() => {})

// Act
await (manager as any)._recreateServices()

// Assert - Should log info message
expect(consoleInfoSpy).toHaveBeenCalledWith(
".gitignore file not found or could not be read, proceeding without gitignore patterns",
)

// Should not attempt to add patterns
expect(mockIgnoreInstance.add).not.toHaveBeenCalled()

// Service creation should proceed normally
expect(mockServiceFactoryInstance.createServices).toHaveBeenCalled()
expect(mockServiceFactoryInstance.validateEmbedder).toHaveBeenCalled()

// Cleanup
consoleInfoSpy.mockRestore()
})

it("should handle mixed valid and invalid patterns", async () => {
// Arrange - Mock .gitignore with mix of valid and invalid patterns
const mixedGitignoreContent = `
node_modules/
pqh[A-/]
*.log
[Z-A]invalid
dist/
`
;(fs.readFile as any).mockResolvedValue(mixedGitignoreContent)

// Make add() throw on invalid patterns
mockIgnoreInstance.add.mockImplementation((pattern: string) => {
if (pattern === mixedGitignoreContent) {
throw new Error("Invalid patterns detected")
}
if (pattern.includes("[A-/]") || pattern.includes("[Z-A]")) {
throw new Error("Invalid character range")
}
})

// Spy on console methods
const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {})

// Act
await (manager as any)._recreateServices()

// Assert - Should have logged warnings for invalid patterns
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining("Warning: .gitignore contains invalid patterns"),
)
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining('Skipping invalid .gitignore pattern: "pqh[A-/]"'),
)
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining('Skipping invalid .gitignore pattern: "[Z-A]invalid"'),
)

// Should have attempted to add valid patterns
expect(mockIgnoreInstance.add).toHaveBeenCalledWith("node_modules/")
expect(mockIgnoreInstance.add).toHaveBeenCalledWith("*.log")
expect(mockIgnoreInstance.add).toHaveBeenCalledWith("dist/")
expect(mockIgnoreInstance.add).toHaveBeenCalledWith(".gitignore")

// Service creation should proceed normally
expect(mockServiceFactoryInstance.createServices).toHaveBeenCalled()
expect(mockServiceFactoryInstance.validateEmbedder).toHaveBeenCalled()

// Cleanup
consoleWarnSpy.mockRestore()
})
})
})
51 changes: 42 additions & 9 deletions src/services/code-index/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -315,16 +315,49 @@ export class CodeIndexManager {
const ignorePath = path.join(workspacePath, ".gitignore")
try {
const content = await fs.readFile(ignorePath, "utf8")
ignoreInstance.add(content)
ignoreInstance.add(".gitignore")

// Try to add the gitignore patterns, but handle invalid regex patterns gracefully
try {
ignoreInstance.add(content)
ignoreInstance.add(".gitignore")
} catch (ignoreError) {
// Log warning about invalid patterns but continue with indexing
console.warn(
`Warning: .gitignore contains invalid patterns that could not be parsed. Some files may not be properly ignored during indexing. Error: ${
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning message could be more actionable. Is it intentional that we don't parse out which specific pattern caused the initial bulk parsing to fail? It might help users fix their .gitignore file more easily if we could identify the problematic pattern in the first error message.

ignoreError instanceof Error ? ignoreError.message : String(ignoreError)
}`,
)

// Try to add individual lines to identify and skip problematic patterns
const lines = content.split("\n")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be cleaner to extract this line-by-line parsing logic into a separate helper method? Something like:

This would improve readability and make the method easier to test in isolation.

for (const line of lines) {
const trimmedLine = line.trim()
// Skip empty lines and comments
if (!trimmedLine || trimmedLine.startsWith("#")) {
continue
}

try {
// Create a new ignore instance to test each pattern
const testIgnore = ignore()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creating a new ignore instance for each pattern validation could be inefficient for large .gitignore files. Could we consider reusing a single test instance or implementing a more efficient validation approach?

testIgnore.add(trimmedLine)
// If successful, add to the main instance
ignoreInstance.add(trimmedLine)
} catch (lineError) {
console.warn(`Skipping invalid .gitignore pattern: "${trimmedLine}"`)
}
}

// Always add .gitignore itself to the ignore list
try {
ignoreInstance.add(".gitignore")
} catch {
// Even this basic pattern failed, but continue anyway
}
}
} catch (error) {
// Should never happen: reading file failed even though it exists
console.error("Unexpected error loading .gitignore:", error)
TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
location: "_recreateServices",
})
// File reading error - .gitignore might not exist or be inaccessible
console.info(".gitignore file not found or could not be read, proceeding without gitignore patterns")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor inconsistency: we're using here but for warnings above. Should we consider using a consistent logging approach or perhaps the VSCode output channel for better user visibility?

}

// (Re)Create shared service instances
Expand Down
Loading