Skip to content

Commit 9fd81b4

Browse files
authored
config: revert to watch config file and fix the concurrency issue (#446)
1 parent c0f4adc commit 9fd81b4

File tree

5 files changed

+82
-29
lines changed

5 files changed

+82
-29
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/BurntSushi/toml v1.2.1
77
github.com/bahlo/generic-list-go v0.2.0
88
github.com/cenkalti/backoff/v4 v4.2.1
9+
github.com/fsnotify/fsnotify v1.6.0
910
github.com/gin-contrib/pprof v1.4.0
1011
github.com/gin-gonic/gin v1.8.1
1112
github.com/go-mysql-org/go-mysql v1.6.0

go.sum

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga
214214
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
215215
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
216216
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
217+
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
218+
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
217219
github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc=
218220
github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ=
219221
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@@ -1039,6 +1041,7 @@ golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBc
10391041
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10401042
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10411043
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1044+
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10421045
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10431046
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10441047
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

pkg/manager/config/config.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import (
77
"bytes"
88
"hash/crc32"
99
"os"
10+
"time"
1011

1112
"github.com/BurntSushi/toml"
13+
"github.com/fsnotify/fsnotify"
1214
"github.com/pingcap/tiproxy/lib/config"
1315
"github.com/pingcap/tiproxy/lib/util/errors"
1416
"go.uber.org/zap"
@@ -23,6 +25,33 @@ func (e *ConfigManager) reloadConfigFile(file string) error {
2325
return e.SetTOMLConfig(proxyConfigData)
2426
}
2527

28+
func (e *ConfigManager) handleFSEvent(ev fsnotify.Event, f string) {
29+
switch {
30+
case ev.Has(fsnotify.Create), ev.Has(fsnotify.Write), ev.Has(fsnotify.Remove), ev.Has(fsnotify.Rename):
31+
// The file may be the log file, triggering reload will cause more logs and thus cause reload again,
32+
// so we need to filter the wrong files.
33+
// The filesystem differs from OS to OS, so don't use string comparison.
34+
f1, err := os.Stat(ev.Name)
35+
if err != nil {
36+
break
37+
}
38+
f2, err := os.Stat(f)
39+
if err != nil {
40+
break
41+
}
42+
if !os.SameFile(f1, f2) {
43+
break
44+
}
45+
if ev.Has(fsnotify.Remove) || ev.Has(fsnotify.Rename) {
46+
// in case of remove/rename the file, files are not present at filesystem for a while
47+
// it may be too fast to read the config file now, sleep for a while
48+
time.Sleep(50 * time.Millisecond)
49+
}
50+
// try to reload it
51+
e.logger.Info("config file reloaded", zap.Stringer("event", ev), zap.Error(e.reloadConfigFile(f)))
52+
}
53+
}
54+
2655
// SetTOMLConfig will do partial config update. Usually, user will expect config changes
2756
// only when they specified a config item. It is, however, impossible to tell a struct
2857
// `c.max-conns == 0` means no user-input, or it specified `0`.

pkg/manager/config/config_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func TestConfigRemove(t *testing.T) {
142142
require.NoError(t, os.Remove(tmpcfg))
143143
require.NoError(t, os.WriteFile(tmpcfg, []byte(`proxy.addr = "gg"`), 0644))
144144

145-
// check that reload still works
145+
// check that re-watch still works
146146
require.Eventually(t, func() bool { return cfgmgr.GetConfig().Proxy.Addr == "gg" }, 3*time.Second, 100*time.Millisecond)
147147

148148
// remove again but with a long sleep
@@ -277,7 +277,7 @@ func TestFilePath(t *testing.T) {
277277

278278
count = 0
279279
cfgmgr, text, _ = testConfigManager(t, test.filename)
280-
checkLog(true)
280+
checkLog(false)
281281

282282
// Test write.
283283
require.NoError(t, os.WriteFile(test.filename, []byte("proxy.pd-addrs = \"127.0.0.1:2379\""), 0644))

pkg/manager/config/manager.go

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ package config
55

66
import (
77
"context"
8-
"os"
8+
"path/filepath"
99
"sync"
1010
"time"
1111

12+
"github.com/fsnotify/fsnotify"
1213
"github.com/pingcap/tiproxy/lib/config"
1314
"github.com/pingcap/tiproxy/lib/util/errors"
1415
"github.com/pingcap/tiproxy/lib/util/waitgroup"
@@ -42,7 +43,7 @@ type ConfigManager struct {
4243

4344
kv *btree.BTreeG[KVValue]
4445

45-
lastModTime time.Time
46+
wch *fsnotify.Watcher
4647
checkFileInterval time.Duration
4748
overlay []byte
4849
sts struct {
@@ -80,22 +81,53 @@ func (e *ConfigManager) Init(ctx context.Context, logger *zap.Logger, configFile
8081
}
8182

8283
if configFile != "" {
83-
if err := e.checkFileAndLoad(configFile); err != nil {
84+
e.wch, err = fsnotify.NewWatcher()
85+
if err != nil {
86+
return errors.WithStack(err)
87+
}
88+
89+
// Watch the parent dir, because vim/k8s or other apps may not edit files in-place:
90+
// e.g. k8s configmap is a symlink of a symlink to a file, which will only trigger
91+
// a remove event for the file.
92+
parentDir := filepath.Dir(configFile)
93+
94+
if err := e.reloadConfigFile(configFile); err != nil {
8495
return err
8596
}
97+
if err := e.wch.Add(parentDir); err != nil {
98+
return errors.WithStack(err)
99+
}
100+
86101
e.wg.Run(func() {
87-
var lastErr error
102+
// Some apps will trigger rename/remove events, which means they will re-create/rename
103+
// the new file to the directory. Watch possibly stopped after rename/remove events.
104+
// So, we use a tick to repeatedly add the parent dir to re-watch files.
88105
ticker := time.NewTicker(e.checkFileInterval)
106+
var watchErr error
89107
for {
90108
select {
91109
case <-nctx.Done():
92110
return
111+
case err := <-e.wch.Errors:
112+
e.logger.Warn("failed to watch config file", zap.Error(err))
113+
watchErr = err
114+
case ev := <-e.wch.Events:
115+
e.handleFSEvent(ev, configFile)
93116
case <-ticker.C:
94-
// Do not report the same error to avoid log flooding.
95-
if err = e.checkFileAndLoad(configFile); err != nil && errors.Is(err, lastErr) {
96-
e.logger.Warn("reload config file failed", zap.Error(err))
117+
// There may be a concurrency issue:
118+
// 1. Remove the directory and the watcher removes the directory automatically
119+
// 2. Create the directory and the file again within a tick
120+
// 3. Add it to the watcher again, but the CREATE event is not sent and the file is not loaded
121+
// So if watch failed and succeeds now, reload the file.
122+
if err := e.wch.Add(parentDir); err != nil {
123+
e.logger.Warn("failed to rewatch config file", zap.Error(err))
124+
watchErr = err
125+
continue
126+
}
127+
if watchErr != nil {
128+
watchErr = e.reloadConfigFile(configFile)
129+
e.logger.Info("config file reloaded", zap.Error(watchErr))
97130
}
98-
lastErr = err
99131
}
100132
}
101133
})
@@ -108,25 +140,8 @@ func (e *ConfigManager) Init(ctx context.Context, logger *zap.Logger, configFile
108140
return nil
109141
}
110142

111-
func (e *ConfigManager) checkFileAndLoad(filename string) error {
112-
info, err := os.Stat(filename)
113-
if err != nil {
114-
return errors.WithStack(err)
115-
}
116-
if info.IsDir() {
117-
return errors.New("config file is a directory")
118-
}
119-
if info.ModTime() != e.lastModTime {
120-
if err = e.reloadConfigFile(filename); err != nil {
121-
return err
122-
}
123-
e.logger.Info("config file reloaded", zap.Time("file_modify_time", info.ModTime()))
124-
e.lastModTime = info.ModTime()
125-
}
126-
return nil
127-
}
128-
129143
func (e *ConfigManager) Close() error {
144+
var wcherr error
130145
if e.cancel != nil {
131146
e.cancel()
132147
e.cancel = nil
@@ -138,5 +153,10 @@ func (e *ConfigManager) Close() error {
138153
e.sts.listeners = nil
139154
e.sts.Unlock()
140155
e.wg.Wait()
141-
return nil
156+
// close after all goroutines are done
157+
if e.wch != nil {
158+
wcherr = e.wch.Close()
159+
e.wch = nil
160+
}
161+
return wcherr
142162
}

0 commit comments

Comments
 (0)