Skip to content

Commit 23dd839

Browse files
h9jianggopherbot
authored andcommitted
gopls/internal/filewatcher: fix race condition on watcher shutdown
The watcher' Close method was subject to a race condition that could cause a panic on a closed internal error channel. The Watcher.Close method waits for all watch-registration goroutines to exit before closing the errs channel. However, due to a race it was possible for another watch-registration task to be added concurrently to the Wait, resulting in a send to a closed channel. This change fixes the race by moving the watchers' Add call into the addWatchHandle method, which is guarded by a mutex. This ensures that the check for a closing watcher and the increment of the waitgroup are atomic. Once the Close method acquires the mutex, no new watch goroutines can be started, guaranteeing that the watchers' Wait method will correctly account for all active goroutines and all the send operations to internal error channel. Fixes golang/go#74805 Change-Id: I741a4a3b02358ba45a8c51a1097ec67b59a71be4 Reviewed-on: https://go-review.googlesource.com/c/tools/+/691935 LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Hongxiang Jiang <[email protected]> Reviewed-by: Robert Findley <[email protected]>
1 parent 3a8978c commit 23dd839

File tree

1 file changed

+47
-52
lines changed

1 file changed

+47
-52
lines changed

gopls/internal/filewatcher/filewatcher.go

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,17 @@ type Watcher struct {
3232
// distinct from the fsnotify watcher's error channel.
3333
errs chan error
3434

35-
// watchers counts the number of active watch registration goroutines,
36-
// including their error handling.
37-
watchers sync.WaitGroup
38-
runners sync.WaitGroup // counts the number of active run goroutines (max 1)
35+
runners sync.WaitGroup // counts the number of active run goroutines (max 1)
3936

4037
watcher *fsnotify.Watcher
4138

4239
mu sync.Mutex // guards all fields below
4340

41+
// watchers counts the number of active watch registration goroutines,
42+
// including their error handling.
43+
// After [Watcher.Close] called, watchers's counter will no longer increase.
44+
watchers sync.WaitGroup
45+
4446
// dirCancel maps a directory path to its cancellation channel.
4547
// A nil map indicates the watcher is closing and prevents new directory
4648
// watch registrations.
@@ -157,17 +159,13 @@ func (w *Watcher) WatchDir(path string) error {
157159
return filepath.SkipDir
158160
}
159161

160-
done := w.addWatchHandle(path)
162+
done, release := w.addWatchHandle(path)
161163
if done == nil { // file watcher closing
162164
return filepath.SkipAll
163165
}
166+
defer release()
164167

165-
errChan := make(chan error, 1)
166-
w.watchDir(path, done, errChan)
167-
168-
if err := <-errChan; err != nil {
169-
return err
170-
}
168+
return w.watchDir(path, done)
171169
}
172170
return nil
173171
})
@@ -229,8 +227,13 @@ func (w *Watcher) handleEvent(event fsnotify.Event) *protocol.FileEvent {
229227
// This watch is added asynchronously to prevent a potential
230228
// deadlock on Windows. See fsnotify/fsnotify#502.
231229
// Error encountered will be sent to internal error channel.
232-
if done := w.addWatchHandle(path); done != nil {
233-
go w.watchDir(path, done, w.errs)
230+
if done, release := w.addWatchHandle(path); done != nil {
231+
go func() {
232+
w.errs <- w.watchDir(path, done)
233+
234+
// Only release after the error is sent.
235+
release()
236+
}()
234237
}
235238

236239
return &protocol.FileEvent{
@@ -273,13 +276,10 @@ func (w *Watcher) handleEvent(event fsnotify.Event) *protocol.FileEvent {
273276
}
274277

275278
// watchDir registers a watch for a directory, retrying with backoff if it fails.
276-
// It can be canceled by calling removeWatchHandle. On success or cancellation,
277-
// nil is sent to 'errChan'; otherwise, the last error after all retries is sent.
278-
func (w *Watcher) watchDir(path string, done chan struct{}, errChan chan error) {
279-
if errChan == nil {
280-
panic("input error chan is nil")
281-
}
282-
279+
// It can be canceled by calling removeWatchHandle.
280+
// Returns nil on success or cancellation; otherwise, the last error after all
281+
// retries.
282+
func (w *Watcher) watchDir(path string, done chan struct{}) error {
283283
// On darwin, watching a directory will fail if it contains broken symbolic
284284
// links. This state can occur temporarily during operations like a git
285285
// branch switch. To handle this, we retry multiple times with exponential
@@ -298,50 +298,49 @@ func (w *Watcher) watchDir(path string, done chan struct{}, errChan chan error)
298298
err error
299299
)
300300

301-
// Watchers wait group becomes done only after errChan send.
302-
w.watchers.Add(1)
303-
defer func() {
304-
errChan <- err
305-
w.watchers.Done()
306-
}()
307-
308301
for i := range 5 {
309302
if i > 0 {
310303
select {
311304
case <-time.After(delay):
312305
delay *= 2
313306
case <-done:
314-
return // cancelled
307+
return nil // cancelled
315308
}
316309
}
317310
// This function may block due to fsnotify/fsnotify#502.
318-
err := w.watcher.Add(path)
311+
err = w.watcher.Add(path)
319312
if afterAddHook != nil {
320313
afterAddHook(path, err)
321314
}
322315
if err == nil {
323-
return
316+
break
324317
}
325318
}
319+
320+
return err
326321
}
327322

328323
var afterAddHook func(path string, err error)
329324

330325
// addWatchHandle registers a new directory watch.
331-
// The returned 'done' channel channel should be used to signal cancellation of
332-
// a pending watch.
326+
// The returned 'done' channel should be used to signal cancellation of a
327+
// pending watch, the release function should be called once watch registration
328+
// is done.
333329
// It returns nil if the watcher is already closing.
334-
func (w *Watcher) addWatchHandle(path string) chan struct{} {
330+
func (w *Watcher) addWatchHandle(path string) (done chan struct{}, release func()) {
335331
w.mu.Lock()
336332
defer w.mu.Unlock()
337333

338334
if w.dirCancel == nil { // file watcher is closing.
339-
return nil
335+
return nil, nil
340336
}
341337

342-
done := make(chan struct{})
338+
done = make(chan struct{})
343339
w.dirCancel[path] = done
344-
return done
340+
341+
w.watchers.Add(1)
342+
343+
return done, w.watchers.Done
345344
}
346345

347346
// removeWatchHandle removes the handle for a directory watch and cancels any
@@ -356,19 +355,6 @@ func (w *Watcher) removeWatchHandle(path string) {
356355
}
357356
}
358357

359-
// close removes all handles and cancels all pending watch attempt for that path
360-
// and set dirCancel to nil which prevent any future watch attempts.
361-
func (w *Watcher) close() {
362-
w.mu.Lock()
363-
dirCancel := w.dirCancel
364-
w.dirCancel = nil
365-
w.mu.Unlock()
366-
367-
for _, ch := range dirCancel {
368-
close(ch)
369-
}
370-
}
371-
372358
// isWatchedDir reports whether the given path has a watch handle, meaning it is
373359
// a directory the watcher is managing.
374360
func (w *Watcher) isWatchedDir(path string) bool {
@@ -409,13 +395,22 @@ func (w *Watcher) drainEvents() []protocol.FileEvent {
409395
// Close shuts down the watcher, waits for the internal goroutine to terminate,
410396
// and returns any final error.
411397
func (w *Watcher) Close() error {
398+
// Set dirCancel to nil which prevent any future watch attempts.
399+
w.mu.Lock()
400+
dirCancel := w.dirCancel
401+
w.dirCancel = nil
402+
w.mu.Unlock()
403+
412404
// Cancel any ongoing watch registration.
413-
w.close()
405+
for _, ch := range dirCancel {
406+
close(ch)
407+
}
414408

415409
// Wait for all watch registration goroutines to finish, including their
416410
// error handling. This ensures that:
417-
// - All [Watcher.watchDir] goroutines have exited and sent their errors, so
418-
// it is safe to close the internal error channel.
411+
// - All [Watcher.watchDir] goroutines have exited and it's error is sent
412+
// to the internal error channel. So it is safe to close the internal
413+
// error channel.
419414
// - There are no ongoing [fsnotify.Watcher.Add] calls, so it is safe to
420415
// close the fsnotify watcher (see fsnotify/fsnotify#704).
421416
w.watchers.Wait()

0 commit comments

Comments
 (0)