|
13 | 13 | // or an F_OFD_SETLK command for 'fcntl', that allows for better concurrency and |
14 | 14 | // does not require per-inode bookkeeping in the application. |
15 | 15 | // |
16 | | -// TODO(bcmills): If we add a build tag for Illumos (see golang.org/issue/20603) |
17 | | -// then Illumos should use F_OFD_SETLK, and the resulting code would be as |
18 | | -// simple as filelock_unix.go. We will still need the code in this file for AIX |
19 | | -// or as long as Oracle Solaris provides only F_SETLK. |
| 16 | +// TODO(golang.org/issue/35618): add a syscall.Flock binding for Illumos and |
| 17 | +// switch it over to use filelock_unix.go. |
20 | 18 |
|
21 | 19 | package filelock |
22 | 20 |
|
23 | 21 | import ( |
24 | 22 | "errors" |
25 | 23 | "io" |
| 24 | + "math/rand" |
26 | 25 | "os" |
27 | 26 | "sync" |
28 | 27 | "syscall" |
| 28 | + "time" |
29 | 29 | ) |
30 | 30 |
|
31 | 31 | type lockType int16 |
@@ -91,7 +91,67 @@ func lock(f File, lt lockType) (err error) { |
91 | 91 | wait <- f |
92 | 92 | } |
93 | 93 |
|
94 | | - err = setlkw(f.Fd(), lt) |
| 94 | + // Spurious EDEADLK errors arise on platforms that compute deadlock graphs at |
| 95 | + // the process, rather than thread, level. Consider processes P and Q, with |
| 96 | + // threads P.1, P.2, and Q.3. The following trace is NOT a deadlock, but will be |
| 97 | + // reported as a deadlock on systems that consider only process granularity: |
| 98 | + // |
| 99 | + // P.1 locks file A. |
| 100 | + // Q.3 locks file B. |
| 101 | + // Q.3 blocks on file A. |
| 102 | + // P.2 blocks on file B. (This is erroneously reported as a deadlock.) |
| 103 | + // P.1 unlocks file A. |
| 104 | + // Q.3 unblocks and locks file A. |
| 105 | + // Q.3 unlocks files A and B. |
| 106 | + // P.2 unblocks and locks file B. |
| 107 | + // P.2 unlocks file B. |
| 108 | + // |
| 109 | + // These spurious errors were observed in practice on AIX and Solaris in |
| 110 | + // cmd/go: see https://golang.org/issue/32817. |
| 111 | + // |
| 112 | + // We work around this bug by treating EDEADLK as always spurious. If there |
| 113 | + // really is a lock-ordering bug between the interacting processes, it will |
| 114 | + // become a livelock instead, but that's not appreciably worse than if we had |
| 115 | + // a proper flock implementation (which generally does not even attempt to |
| 116 | + // diagnose deadlocks). |
| 117 | + // |
| 118 | + // In the above example, that changes the trace to: |
| 119 | + // |
| 120 | + // P.1 locks file A. |
| 121 | + // Q.3 locks file B. |
| 122 | + // Q.3 blocks on file A. |
| 123 | + // P.2 spuriously fails to lock file B and goes to sleep. |
| 124 | + // P.1 unlocks file A. |
| 125 | + // Q.3 unblocks and locks file A. |
| 126 | + // Q.3 unlocks files A and B. |
| 127 | + // P.2 wakes up and locks file B. |
| 128 | + // P.2 unlocks file B. |
| 129 | + // |
| 130 | + // We know that the retry loop will not introduce a *spurious* livelock |
| 131 | + // because, according to the POSIX specification, EDEADLK is only to be |
| 132 | + // returned when “the lock is blocked by a lock from another process”. |
| 133 | + // If that process is blocked on some lock that we are holding, then the |
| 134 | + // resulting livelock is due to a real deadlock (and would manifest as such |
| 135 | + // when using, for example, the flock implementation of this package). |
| 136 | + // If the other process is *not* blocked on some other lock that we are |
| 137 | + // holding, then it will eventually release the requested lock. |
| 138 | + |
| 139 | + nextSleep := 1 * time.Millisecond |
| 140 | + const maxSleep = 500 * time.Millisecond |
| 141 | + for { |
| 142 | + err = setlkw(f.Fd(), lt) |
| 143 | + if err != syscall.EDEADLK { |
| 144 | + break |
| 145 | + } |
| 146 | + time.Sleep(nextSleep) |
| 147 | + |
| 148 | + nextSleep += nextSleep |
| 149 | + if nextSleep > maxSleep { |
| 150 | + nextSleep = maxSleep |
| 151 | + } |
| 152 | + // Apply 10% jitter to avoid synchronizing collisions when we finally unblock. |
| 153 | + nextSleep += time.Duration((0.1*rand.Float64() - 0.05) * float64(nextSleep)) |
| 154 | + } |
95 | 155 |
|
96 | 156 | if err != nil { |
97 | 157 | unlock(f) |
|
0 commit comments