Skip to content

Commit c26bc77

Browse files
committed
lockfile: add PID file for debugging stale locks
When a lock file is held, it can be helpful to know which process owns it, especially when debugging stale locks left behind by crashed processes. Add an optional feature that creates a companion PID file alongside each lock file, containing the PID of the lock holder. For a lock file "foo.lock", the PID file is named "foo~pid.lock". The tilde character is forbidden in refnames and allowed in Windows filenames, which guarantees no collision with the refs namespace (e.g., refs "foo" and "foo~pid" cannot both exist). The file contains a single line in the format "pid <value>" followed by a newline. The PID file is created when a lock is acquired (if enabled), and automatically cleaned up when the lock is released (via commit or rollback). The file is registered as a tempfile so it gets cleaned up by signal and atexit handlers if the process terminates abnormally. When a lock conflict occurs, the code checks for an existing PID file and, if found, uses kill(pid, 0) to determine if the process is still running. This allows providing context-aware error messages: Lock is held by process 12345. Wait for it to finish, or remove the lock file to continue. Or for a stale lock: Lock was held by process 12345, which is no longer running. Remove the stale lock file to continue. The feature is controlled via core.lockfilePid configuration (boolean). Defaults to false. When enabled, PID files are created for all lock operations. Existing PID files are always read when displaying lock errors, regardless of the core.lockfilePid setting. This ensures helpful diagnostics even when the feature was previously enabled and later disabled. Signed-off-by: Paulo Casaretto <[email protected]>
1 parent 66ce5f8 commit c26bc77

File tree

9 files changed

+320
-35
lines changed

9 files changed

+320
-35
lines changed

Documentation/config/core.adoc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,17 @@ confusion unless you know what you are doing (e.g. you are creating a
348348
read-only snapshot of the same index to a location different from the
349349
repository's usual working tree).
350350

351+
core.lockfilePid::
352+
If true, Git will create a PID file alongside lock files. When a
353+
lock acquisition fails and a PID file exists, Git can provide
354+
additional diagnostic information about the process holding the
355+
lock, including whether it is still running. Defaults to `false`.
356+
+
357+
The PID file is named by inserting `~pid` before the `.lock` suffix.
358+
For example, if the lock file is `index.lock`, the PID file will be
359+
`index~pid.lock`. The file contains a single line in the format
360+
`pid <value>` followed by a newline.
361+
351362
core.logAllRefUpdates::
352363
Enable the reflog. Updates to a ref <ref> is logged to the file
353364
"`$GIT_DIR/logs/<ref>`", by appending the new and old

builtin/commit.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,7 @@ static const char *prepare_index(const char **argv, const char *prefix,
539539

540540
path = repo_git_path(the_repository, "next-index-%"PRIuMAX,
541541
(uintmax_t) getpid());
542-
hold_lock_file_for_update(&false_lock, path,
543-
LOCK_DIE_ON_ERROR);
542+
hold_lock_file_for_update(&false_lock, path, LOCK_DIE_ON_ERROR);
544543

545544
create_base_index(current_head);
546545
add_remove_files(&partial);

builtin/gc.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -748,8 +748,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
748748
xsnprintf(my_host, sizeof(my_host), "unknown");
749749

750750
pidfile_path = repo_git_path(the_repository, "gc.pid");
751-
fd = hold_lock_file_for_update(&lock, pidfile_path,
752-
LOCK_DIE_ON_ERROR);
751+
fd = hold_lock_file_for_update(&lock, pidfile_path, LOCK_DIE_ON_ERROR);
753752
if (!force) {
754753
static char locking_host[HOST_NAME_MAX + 1];
755754
static char *scan_fmt;
@@ -1016,8 +1015,7 @@ int cmd_gc(int argc,
10161015

10171016
if (daemonized) {
10181017
char *path = repo_git_path(the_repository, "gc.log");
1019-
hold_lock_file_for_update(&log_lock, path,
1020-
LOCK_DIE_ON_ERROR);
1018+
hold_lock_file_for_update(&log_lock, path, LOCK_DIE_ON_ERROR);
10211019
dup2(get_lock_file_fd(&log_lock), 2);
10221020
atexit(process_log_file_at_exit);
10231021
free(path);

compat/mingw.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,16 @@ int mingw_kill(pid_t pid, int sig)
19721972
CloseHandle(h);
19731973
return 0;
19741974
}
1975+
/*
1976+
* OpenProcess returns ERROR_INVALID_PARAMETER for
1977+
* non-existent PIDs. Map this to ESRCH for POSIX
1978+
* compatibility with kill(pid, 0).
1979+
*/
1980+
if (GetLastError() == ERROR_INVALID_PARAMETER)
1981+
errno = ESRCH;
1982+
else
1983+
errno = err_win_to_posix(GetLastError());
1984+
return -1;
19751985
}
19761986

19771987
errno = EINVAL;

environment.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "gettext.h"
2222
#include "git-zlib.h"
2323
#include "ident.h"
24+
#include "lockfile.h"
2425
#include "mailmap.h"
2526
#include "object-name.h"
2627
#include "repository.h"
@@ -532,6 +533,11 @@ static int git_default_core_config(const char *var, const char *value,
532533
return 0;
533534
}
534535

536+
if (!strcmp(var, "core.lockfilepid")) {
537+
lockfile_pid_enabled = git_config_bool(var, value);
538+
return 0;
539+
}
540+
535541
if (!strcmp(var, "core.createobject")) {
536542
if (!value)
537543
return config_error_nonbool(var);

lockfile.c

Lines changed: 156 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#include "abspath.h"
77
#include "gettext.h"
88
#include "lockfile.h"
9+
#include "parse.h"
10+
#include "strbuf.h"
11+
#include "wrapper.h"
912

1013
/*
1114
* path = absolute or relative path name
@@ -71,19 +74,119 @@ static void resolve_symlink(struct strbuf *path)
7174
strbuf_reset(&link);
7275
}
7376

77+
/*
78+
* Lock PID file functions - write PID to a foo~pid.lock file alongside
79+
* the lock file for debugging stale locks. The PID file is registered
80+
* as a tempfile so it gets cleaned up by signal/atexit handlers.
81+
*
82+
* Naming: For "foo.lock", the PID file is "foo~pid.lock". The tilde is
83+
* forbidden in refnames and allowed in Windows filenames, guaranteeing
84+
* no collision with the refs namespace.
85+
*/
86+
87+
/* Global config variable, initialized from core.lockfilePid */
88+
int lockfile_pid_enabled;
89+
90+
/*
91+
* Path generation helpers.
92+
* Given base path "foo", generate:
93+
* - lock path: "foo.lock"
94+
* - pid path: "foo-pid.lock"
95+
*/
96+
static void get_lock_path(struct strbuf *out, const char *path)
97+
{
98+
strbuf_addstr(out, path);
99+
strbuf_addstr(out, LOCK_SUFFIX);
100+
}
101+
102+
static void get_pid_path(struct strbuf *out, const char *path)
103+
{
104+
strbuf_addstr(out, path);
105+
strbuf_addstr(out, LOCK_PID_INFIX);
106+
strbuf_addstr(out, LOCK_SUFFIX);
107+
}
108+
109+
static struct tempfile *create_lock_pid_file(const char *pid_path, int mode)
110+
{
111+
struct strbuf content = STRBUF_INIT;
112+
struct tempfile *pid_tempfile = NULL;
113+
int fd = -1;
114+
115+
if (!lockfile_pid_enabled)
116+
goto out;
117+
118+
fd = open(pid_path, O_WRONLY | O_CREAT | O_EXCL, mode);
119+
if (fd < 0)
120+
goto out;
121+
122+
strbuf_addf(&content, "pid %" PRIuMAX "\n", (uintmax_t)getpid());
123+
if (write_in_full(fd, content.buf, content.len) < 0) {
124+
warning_errno(_("could not write lock pid file '%s'"), pid_path);
125+
close(fd);
126+
fd = -1;
127+
unlink(pid_path);
128+
goto out;
129+
}
130+
131+
close(fd);
132+
fd = -1;
133+
pid_tempfile = register_tempfile(pid_path);
134+
135+
out:
136+
if (fd >= 0)
137+
close(fd);
138+
strbuf_release(&content);
139+
return pid_tempfile;
140+
}
141+
142+
static int read_lock_pid(const char *pid_path, uintmax_t *pid_out)
143+
{
144+
struct strbuf content = STRBUF_INIT;
145+
const char *val;
146+
int ret = -1;
147+
148+
if (strbuf_read_file(&content, pid_path, LOCK_PID_MAXLEN) <= 0)
149+
goto out;
150+
151+
strbuf_rtrim(&content);
152+
153+
if (skip_prefix(content.buf, "pid ", &val)) {
154+
char *endptr;
155+
*pid_out = strtoumax(val, &endptr, 10);
156+
if (*pid_out > 0 && !*endptr)
157+
ret = 0;
158+
}
159+
160+
if (ret)
161+
warning(_("malformed lock pid file '%s'"), pid_path);
162+
163+
out:
164+
strbuf_release(&content);
165+
return ret;
166+
}
167+
74168
/* Make sure errno contains a meaningful value on error */
75169
static int lock_file(struct lock_file *lk, const char *path, int flags,
76170
int mode)
77171
{
78-
struct strbuf filename = STRBUF_INIT;
172+
struct strbuf base_path = STRBUF_INIT;
173+
struct strbuf lock_path = STRBUF_INIT;
174+
struct strbuf pid_path = STRBUF_INIT;
79175

80-
strbuf_addstr(&filename, path);
176+
strbuf_addstr(&base_path, path);
81177
if (!(flags & LOCK_NO_DEREF))
82-
resolve_symlink(&filename);
178+
resolve_symlink(&base_path);
179+
180+
get_lock_path(&lock_path, base_path.buf);
181+
get_pid_path(&pid_path, base_path.buf);
182+
183+
lk->tempfile = create_tempfile_mode(lock_path.buf, mode);
184+
if (lk->tempfile)
185+
lk->pid_tempfile = create_lock_pid_file(pid_path.buf, mode);
83186

84-
strbuf_addstr(&filename, LOCK_SUFFIX);
85-
lk->tempfile = create_tempfile_mode(filename.buf, mode);
86-
strbuf_release(&filename);
187+
strbuf_release(&base_path);
188+
strbuf_release(&lock_path);
189+
strbuf_release(&pid_path);
87190
return lk->tempfile ? lk->tempfile->fd : -1;
88191
}
89192

@@ -151,16 +254,47 @@ static int lock_file_timeout(struct lock_file *lk, const char *path,
151254
void unable_to_lock_message(const char *path, int err, struct strbuf *buf)
152255
{
153256
if (err == EEXIST) {
154-
strbuf_addf(buf, _("Unable to create '%s.lock': %s.\n\n"
155-
"Another git process seems to be running in this repository, e.g.\n"
156-
"an editor opened by 'git commit'. Please make sure all processes\n"
157-
"are terminated then try again. If it still fails, a git process\n"
158-
"may have crashed in this repository earlier:\n"
159-
"remove the file manually to continue."),
160-
absolute_path(path), strerror(err));
161-
} else
257+
const char *abs_path = absolute_path(path);
258+
struct strbuf lock_path = STRBUF_INIT;
259+
struct strbuf pid_path = STRBUF_INIT;
260+
uintmax_t pid;
261+
int pid_status = 0; /* 0 = unknown, 1 = running, -1 = stale */
262+
263+
get_lock_path(&lock_path, abs_path);
264+
get_pid_path(&pid_path, abs_path);
265+
266+
strbuf_addf(buf, _("Unable to create '%s': %s.\n\n"),
267+
lock_path.buf, strerror(err));
268+
269+
/*
270+
* Try to read PID file unconditionally - it may exist if
271+
* core.lockfilePid was enabled.
272+
*/
273+
if (!read_lock_pid(pid_path.buf, &pid)) {
274+
if (kill((pid_t)pid, 0) == 0 || errno == EPERM)
275+
pid_status = 1; /* running (or no permission to signal) */
276+
else if (errno == ESRCH)
277+
pid_status = -1; /* no such process - stale lock */
278+
}
279+
280+
if (pid_status == 1)
281+
strbuf_addf(buf, _("Lock is held by process %" PRIuMAX ". "
282+
"Wait for it to finish, or remove the lock file to continue"),
283+
pid);
284+
else if (pid_status == -1)
285+
strbuf_addf(buf, _("Lock was held by process %" PRIuMAX ", "
286+
"which is no longer running. Remove the stale lock file to continue"),
287+
pid);
288+
else
289+
strbuf_addstr(buf, _("Another git process seems to be running in this repository. "
290+
"Wait for it to finish, or remove the lock file to continue"));
291+
292+
strbuf_release(&lock_path);
293+
strbuf_release(&pid_path);
294+
} else {
162295
strbuf_addf(buf, _("Unable to create '%s.lock': %s"),
163296
absolute_path(path), strerror(err));
297+
}
164298
}
165299

166300
NORETURN void unable_to_lock_die(const char *path, int err)
@@ -207,6 +341,8 @@ int commit_lock_file(struct lock_file *lk)
207341
{
208342
char *result_path = get_locked_file_path(lk);
209343

344+
delete_tempfile(&lk->pid_tempfile);
345+
210346
if (commit_lock_file_to(lk, result_path)) {
211347
int save_errno = errno;
212348
free(result_path);
@@ -216,3 +352,9 @@ int commit_lock_file(struct lock_file *lk)
216352
free(result_path);
217353
return 0;
218354
}
355+
356+
int rollback_lock_file(struct lock_file *lk)
357+
{
358+
delete_tempfile(&lk->pid_tempfile);
359+
return delete_tempfile(&lk->tempfile);
360+
}

lockfile.h

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119

120120
struct lock_file {
121121
struct tempfile *tempfile;
122+
struct tempfile *pid_tempfile;
122123
};
123124

124125
#define LOCK_INIT { 0 }
@@ -127,6 +128,22 @@ struct lock_file {
127128
#define LOCK_SUFFIX ".lock"
128129
#define LOCK_SUFFIX_LEN 5
129130

131+
/*
132+
* PID file naming: for a lock file "foo.lock", the PID file is "foo~pid.lock".
133+
* The tilde is forbidden in refnames and allowed in Windows filenames, avoiding
134+
* namespace collisions (e.g., refs "foo" and "foo~pid" cannot both exist).
135+
*/
136+
#define LOCK_PID_INFIX "~pid"
137+
#define LOCK_PID_INFIX_LEN 4
138+
139+
/* Maximum length for PID file content */
140+
#define LOCK_PID_MAXLEN 32
141+
142+
/*
143+
* Whether to create PID files alongside lock files.
144+
* Configured via core.lockfilePid (boolean).
145+
*/
146+
extern int lockfile_pid_enabled;
130147

131148
/*
132149
* Flags
@@ -169,12 +186,12 @@ struct lock_file {
169186
* handling, and mode are described above.
170187
*/
171188
int hold_lock_file_for_update_timeout_mode(
172-
struct lock_file *lk, const char *path,
173-
int flags, long timeout_ms, int mode);
189+
struct lock_file *lk, const char *path,
190+
int flags, long timeout_ms, int mode);
174191

175192
static inline int hold_lock_file_for_update_timeout(
176-
struct lock_file *lk, const char *path,
177-
int flags, long timeout_ms)
193+
struct lock_file *lk, const char *path,
194+
int flags, long timeout_ms)
178195
{
179196
return hold_lock_file_for_update_timeout_mode(lk, path, flags,
180197
timeout_ms, 0666);
@@ -186,15 +203,14 @@ static inline int hold_lock_file_for_update_timeout(
186203
* argument and error handling are described above.
187204
*/
188205
static inline int hold_lock_file_for_update(
189-
struct lock_file *lk, const char *path,
190-
int flags)
206+
struct lock_file *lk, const char *path, int flags)
191207
{
192208
return hold_lock_file_for_update_timeout(lk, path, flags, 0);
193209
}
194210

195211
static inline int hold_lock_file_for_update_mode(
196-
struct lock_file *lk, const char *path,
197-
int flags, int mode)
212+
struct lock_file *lk, const char *path,
213+
int flags, int mode)
198214
{
199215
return hold_lock_file_for_update_timeout_mode(lk, path, flags, 0, mode);
200216
}
@@ -319,13 +335,10 @@ static inline int commit_lock_file_to(struct lock_file *lk, const char *path)
319335

320336
/*
321337
* Roll back `lk`: close the file descriptor and/or file pointer and
322-
* remove the lockfile. It is a NOOP to call `rollback_lock_file()`
323-
* for a `lock_file` object that has already been committed or rolled
324-
* back. No error will be returned in this case.
338+
* remove the lockfile and any associated PID file. It is a NOOP to
339+
* call `rollback_lock_file()` for a `lock_file` object that has already
340+
* been committed or rolled back. No error will be returned in this case.
325341
*/
326-
static inline int rollback_lock_file(struct lock_file *lk)
327-
{
328-
return delete_tempfile(&lk->tempfile);
329-
}
342+
int rollback_lock_file(struct lock_file *lk);
330343

331344
#endif /* LOCKFILE_H */

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ integration_tests = [
9898
't0028-working-tree-encoding.sh',
9999
't0029-core-unsetenvvars.sh',
100100
't0030-stripspace.sh',
101+
't0031-lockfile-pid.sh',
101102
't0033-safe-directory.sh',
102103
't0034-root-safe-directory.sh',
103104
't0035-safe-bare-repository.sh',

0 commit comments

Comments
 (0)