Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 136 additions & 54 deletions src/services/checkpoints/ShadowCheckpointService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import * as vscode from "vscode"

import { fileExistsAtPath } from "../../utils/fs"
import { executeRipgrep } from "../../services/search/file-search"
import { t } from "../../i18n"

import { CheckpointDiff, CheckpointResult, CheckpointEventMap } from "./types"
import { getExcludePatterns } from "./excludes"
Expand Down Expand Up @@ -70,20 +69,6 @@ export abstract class ShadowCheckpointService extends EventEmitter {
throw new Error("Shadow git repo already initialized")
}

const nestedGitPath = await this.getNestedGitRepository()

if (nestedGitPath) {
// Show persistent error message with the offending path
const relativePath = path.relative(this.workspaceDir, nestedGitPath)
const message = t("common:errors.nested_git_repos_warning", { path: relativePath })
vscode.window.showErrorMessage(message)

throw new Error(
`Checkpoints are disabled because a nested git repository was detected at: ${relativePath}. ` +
"Please remove or relocate nested git repositories to use the checkpoints feature.",
)
}

await fs.mkdir(this.checkpointsDir, { recursive: true })
const git = simpleGit(this.checkpointsDir)
const gitVersion = await git.version()
Expand Down Expand Up @@ -152,63 +137,160 @@ export abstract class ShadowCheckpointService extends EventEmitter {

private async stageAll(git: SimpleGit) {
try {
await git.add([".", "--ignore-errors"])
// Find all nested repos to exclude
const nestedRepos = await this.findNestedRepos(git)

if (nestedRepos.length > 0) {
this.log(
`[${this.constructor.name}#stageAll] excluding ${nestedRepos.length} nested repos: ${nestedRepos.join(", ")}`,
)
}

// Remove any existing gitlinks from the index before staging
for (const repoPath of nestedRepos) {
try {
await git.raw(["rm", "--cached", "--ignore-unmatch", "-r", repoPath])
} catch (error) {
this.log(
`[${this.constructor.name}#stageAll] failed to remove cached gitlink ${repoPath}: ${error instanceof Error ? error.message : String(error)}`,
)
}
}

// Build add command with pathspec excludes
const addArgs: string[] = ["-A", ":/"]
for (const repoPath of nestedRepos) {
addArgs.push(`:(exclude)${repoPath}/`)
}

// Stage files
await git.add(addArgs)

// Enhanced safety check: verify no mode 160000 entries in staging area
const stagedFiles = await git.raw(["ls-files", "-s", "--cached"])
const gitlinkEntries = stagedFiles
.split("\n")
.filter((line) => line.startsWith("160000"))
.map((line) => line.split(/\s+/)[3])
.filter(Boolean)

if (gitlinkEntries.length > 0) {
throw new Error(
`Gitlink entries detected in staging area: ${gitlinkEntries.join(", ")} - this should not happen`,
)
}

// Additional check for .gitmodules changes
const diffSummary = await git.raw(["diff", "--cached", "--name-only"])
if (diffSummary.includes(".gitmodules")) {
this.log(`[${this.constructor.name}#stageAll] warning: .gitmodules changes detected in staging area`)
}
} catch (error) {
this.log(
`[${this.constructor.name}#stageAll] failed to add files to git: ${error instanceof Error ? error.message : String(error)}`,
)
throw error
}
}

private async getNestedGitRepository(): Promise<string | null> {
private async findNestedRepos(git: SimpleGit): Promise<string[]> {
const nestedRepos = new Set<string>()

// 1. From .gitmodules (declared submodules)
try {
// Find all .git/HEAD files that are not at the root level.
const args = ["--files", "--hidden", "--follow", "-g", "**/.git/HEAD", this.workspaceDir]

const gitPaths = await executeRipgrep({ args, workspacePath: this.workspaceDir })

// Filter to only include nested git directories (not the root .git).
// Since we're searching for HEAD files, we expect type to be "file"
const nestedGitPaths = gitPaths.filter(({ type, path: filePath }) => {
// Check if it's a file and is a nested .git/HEAD (not at root)
if (type !== "file") return false

// Ensure it's a .git/HEAD file and not the root one
const normalizedPath = filePath.replace(/\\/g, "/")
return (
normalizedPath.includes(".git/HEAD") &&
!normalizedPath.startsWith(".git/") &&
normalizedPath !== ".git/HEAD"
const config = await git.raw(["config", "-f", ".gitmodules", "--get-regexp", "^submodule\\..*\\.path$"])
for (const line of config.split("\n")) {
const match = line.match(/submodule\..*\.path\s+(.+)/)
if (match) nestedRepos.add(match[1])
}
} catch (error) {
// No .gitmodules file is expected in most cases, only log if it's a real error
if (error instanceof Error && !error.message.includes("exit code 1")) {
this.log(
`[${this.constructor.name}#findNestedRepos] warning: failed to read .gitmodules: ${error.message}`,
)
})

if (nestedGitPaths.length > 0) {
// Get the first nested git repository path
// Remove .git/HEAD from the path to get the repository directory
const headPath = nestedGitPaths[0].path
}
}

// Use path module to properly extract the repository directory
// The HEAD file is at .git/HEAD, so we need to go up two directories
const gitDir = path.dirname(headPath) // removes HEAD, gives us .git
const repoDir = path.dirname(gitDir) // removes .git, gives us the repo directory
// 2. From index (gitlinks with mode 160000)
try {
const lsFiles = await git.raw(["ls-files", "-s"])
for (const line of lsFiles.split("\n")) {
if (line.startsWith("160000")) {
const parts = line.split(/\s+/)
if (parts[3]) nestedRepos.add(parts[3])
}
}
} catch (error) {
this.log(
`[${this.constructor.name}#findNestedRepos] warning: failed to list files from index: ${error instanceof Error ? error.message : String(error)}`,
)
}

const absolutePath = path.join(this.workspaceDir, repoDir)
// 3. From filesystem (any nested .git directory or worktree)
try {
const gitDirs = await executeRipgrep({
args: ["--files", "--hidden", "--follow", "-g", "**/.git/HEAD", this.workspaceDir],
workspacePath: this.workspaceDir,
})

this.log(
`[${this.constructor.name}#getNestedGitRepository] found ${nestedGitPaths.length} nested git repositories, first at: ${repoDir}`,
)
return absolutePath
for (const result of gitDirs) {
if (result.type === "file") {
const normalizedPath = result.path.replace(/\\/g, "/")
if (
normalizedPath.includes(".git/HEAD") &&
!normalizedPath.startsWith(".git/") &&
normalizedPath !== ".git/HEAD"
) {
// Extract repo directory (remove .git/HEAD)
const gitDir = path.dirname(result.path)
const repoDir = path.dirname(gitDir)
nestedRepos.add(repoDir)
}
}
}

return null
} catch (error) {
this.log(
`[${this.constructor.name}#getNestedGitRepository] failed to check for nested git repos: ${error instanceof Error ? error.message : String(error)}`,
`[${this.constructor.name}#findNestedRepos] failed to search filesystem: ${error instanceof Error ? error.message : String(error)}`,
)
}

// 4. From filesystem (git worktrees - .git files pointing to worktree)
try {
const gitFiles = await executeRipgrep({
args: ["--files", "--hidden", "--follow", "-g", "**/.git", this.workspaceDir],
workspacePath: this.workspaceDir,
})

// If we can't check, assume there are no nested repos to avoid blocking the feature.
return null
for (const result of gitFiles) {
if (result.type === "file") {
const normalizedPath = result.path.replace(/\\/g, "/")
// Check if this is a .git file (not directory) and not the root
if (normalizedPath.endsWith("/.git") && normalizedPath !== ".git") {
try {
// Read the .git file to check if it's a worktree
const gitFilePath = path.join(this.workspaceDir, result.path)
const content = await fs.readFile(gitFilePath, "utf8")
if (content.trim().startsWith("gitdir:")) {
// This is a worktree - exclude its directory
const repoDir = path.dirname(result.path)
nestedRepos.add(repoDir)
}
} catch (error) {
this.log(
`[${this.constructor.name}#findNestedRepos] warning: failed to read .git file at ${result.path}: ${error instanceof Error ? error.message : String(error)}`,
)
}
}
}
}
} catch (error) {
this.log(
`[${this.constructor.name}#findNestedRepos] failed to search for worktrees: ${error instanceof Error ? error.message : String(error)}`,
)
}

return Array.from(nestedRepos).filter((p) => p && p !== ".")
}

private async getShadowGitConfigWorktree(git: SimpleGit) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,8 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
})
})

describe(`${klass.name}#hasNestedGitRepositories`, () => {
it("throws error when nested git repositories are detected during initialization", async () => {
describe(`${klass.name}#nestedGitRepositories`, () => {
it("succeeds when nested git repositories are detected and excludes them from checkpoints", async () => {
// Create a new temporary workspace and service for this test.
const shadowDir = path.join(tmpDir, `${prefix}-nested-git-${Date.now()}`)
const workspaceDir = path.join(tmpDir, `workspace-nested-git-${Date.now()}`)
Expand Down Expand Up @@ -435,13 +435,33 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
}
})

const service = new klass(taskId, shadowDir, workspaceDir, () => {})
const logMessages: string[] = []
const service = new klass(taskId, shadowDir, workspaceDir, (msg: string) => logMessages.push(msg))

// Verify that initialization throws an error when nested git repos are detected
// The error message now includes the specific path of the nested repository
await expect(service.initShadowGit()).rejects.toThrowError(
/Checkpoints are disabled because a nested git repository was detected at:/,
)
// Verify that initialization succeeds even with nested git repos
await expect(service.initShadowGit()).resolves.not.toThrow()
expect(service.isInitialized).toBe(true)

// Modify files in both main workspace and nested repo
await fs.writeFile(mainFile, "Modified content in main repo")
await fs.writeFile(nestedFile, "Modified content in nested repo")

// Save a checkpoint
const checkpoint = await service.saveCheckpoint("Test with nested repos")
expect(checkpoint?.commit).toBeTruthy()

// Verify that only the main file was included in the checkpoint
const diff = await service.getDiff({ to: checkpoint!.commit })
const mainFileChange = diff.find((change) => change.paths.relative === "main-file.txt")
const nestedFileChange = diff.find((change) => change.paths.relative.includes("nested-file.txt"))

expect(mainFileChange).toBeDefined()
expect(mainFileChange?.content.after).toBe("Modified content in main repo")
expect(nestedFileChange).toBeUndefined() // Nested repo changes should be excluded

// Verify that the log includes information about excluding nested repos
const excludeLog = logMessages.find((msg) => msg.includes("excluding") && msg.includes("nested repos"))
expect(excludeLog).toBeDefined()

// Clean up.
vitest.restoreAllMocks()
Expand Down Expand Up @@ -478,6 +498,17 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
await expect(service.initShadowGit()).resolves.not.toThrow()
expect(service.isInitialized).toBe(true)

// Modify the main file and save a checkpoint
await fs.writeFile(mainFile, "Modified content")
const checkpoint = await service.saveCheckpoint("Test without nested repos")
expect(checkpoint?.commit).toBeTruthy()

// Verify the change was included in the checkpoint
const diff = await service.getDiff({ to: checkpoint!.commit })
expect(diff).toHaveLength(1)
expect(diff[0].paths.relative).toBe("main-file.txt")
expect(diff[0].content.after).toBe("Modified content")

// Clean up.
vitest.restoreAllMocks()
await fs.rm(shadowDir, { recursive: true, force: true })
Expand Down