Skip to content

Commit c77d512

Browse files
cstocktonChris Stockton
andauthored
feat: config reloading with fsnotify, poller fallback, and signals (#2161)
**Summary** This PR introduces additional configuration reloading options. Previously, Auth only supported fsnotify for watching `.env` changes, which fails on some filesystems. This implementation adds a poller fallback mode and a signal-triggered reloads. **Config reloader** * Multi-source triggers: * **fsnotify** for `.env` changes where supported * **poller fallback** that diffs directory contents for filesystems without notification support * **POSIX signals** (default `SIGUSR1`) for operator initiated reloads (i.e. `systemctl reoad`) * Debounce via `GracePeriodInterval` to coalesce bursts of changes into a single reload. * Invalid configs are logged but ignored, last good configuration continues to serve. **Wiring & observability** * Update `serve_cmd` to construct `NewReloader` from `ReloadingConfiguration` and log enabled modes/intervals. * Graceful shutdown handled independently of reloads. * Errors from watchers and pollers logged clearly, fallbacks engaged automatically when possible. **Configuration knobs** (`GOTRUE_RELOADING_*`) * `NotifyEnabled` (default true) * `PollerEnabled` (default true), `PollerInterval` (default 10s) * `SignalEnabled` (default false), `SignalNumber` (default 10 / SIGUSR1) * `GracePeriodInterval` (default 5s) **Tests** * End-to-end reloader tests: fsnotify happy path, poller fallback, signal path, debounce, invalid configs. * Poller unit tests for detection semantics and concurrency guard. --------- Co-authored-by: Chris Stockton <[email protected]>
1 parent a9424d2 commit c77d512

File tree

6 files changed

+1252
-164
lines changed

6 files changed

+1252
-164
lines changed

cmd/serve_cmd.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,40 @@ func serve(ctx context.Context) {
103103
go func() {
104104
defer wg.Done()
105105

106+
rc := config.Reloading
107+
le := logrus.WithFields(logrus.Fields{
108+
"component": "reloader",
109+
"notify_enabled": rc.NotifyEnabled,
110+
"poller_enabled": rc.PollerEnabled,
111+
"poller_interval": rc.PollerInterval.String(),
112+
"signal_enabled": rc.SignalEnabled,
113+
"signal_number": rc.SignalNumber,
114+
"grace_period_duration": rc.GracePeriodInterval.String(),
115+
})
116+
le.Info("starting configuration reloader")
117+
118+
var err error
119+
defer func() {
120+
exitFn := le.Info
121+
if err != nil {
122+
exitFn = le.WithError(err).Error
123+
}
124+
exitFn("config reloader is exiting")
125+
}()
126+
106127
fn := func(latestCfg *conf.GlobalConfiguration) {
107-
log.Info("reloading api with new configuration")
128+
le.Info("reloading api with new configuration")
108129

109130
// When config is updated we notify the apiworker.
110131
wrk.ReloadConfig(latestCfg)
111132

112133
// Create a new API version with the updated config.
113134
latestAPI := api.NewAPIWithVersion(
114-
config, db, utilities.Version,
135+
latestCfg, db, utilities.Version,
115136

116137
// Create a new mailer with existing template cache.
117138
api.WithMailer(
118-
templatemailer.FromConfig(config, mrCache),
139+
templatemailer.FromConfig(latestCfg, mrCache),
119140
),
120141

121142
// Persist existing rate limiters.
@@ -128,9 +149,9 @@ func serve(ctx context.Context) {
128149
ah.Store(latestAPI)
129150
}
130151

131-
rl := reloader.NewReloader(watchDir)
132-
if err := rl.Watch(ctx, fn); err != nil {
133-
log.WithError(err).Error("watcher is exiting")
152+
rl := reloader.NewReloader(rc, watchDir)
153+
if err = rl.Watch(ctx, fn); err != nil {
154+
log.WithError(err).Error("config reloader is exiting")
134155
}
135156
}()
136157
}

internal/conf/configuration.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,39 @@ type ExperimentalConfiguration struct {
266266
ProvidersWithOwnLinkingDomain []string `split_words:"true"`
267267
}
268268

269+
// ReloadingConfiguration holds the configuration values for runtime
270+
// configuration reloads. These are startup configuration values meaning
271+
// they do not react to live config reloads.
272+
//
273+
// IMPORTANT:
274+
// * You must provide the --config-dir flag for these settings to take effect.
275+
// * These config values are for startup, they remain static through reloads.
276+
type ReloadingConfiguration struct {
277+
278+
// If notify reloading is enabled the auth server will attempt to use the
279+
// filesystems notification support to watch for config updates.
280+
NotifyEnabled bool `json:"notify_enabled" split_words:"true" default:"true"`
281+
282+
// When notify reloading fails, fallback to filesystem polling if this
283+
// setting is enabled.
284+
PollerEnabled bool `json:"poller_enabled" split_words:"false" default:"false"`
285+
286+
// This determines how often to poll the filesystem when notify is disabled.
287+
PollerInterval time.Duration `json:"poller_interval" split_words:"true" default:"10s"`
288+
289+
// If signal reloading is enabled the auth server will listen for the
290+
// given SignalNumber and reload the config when received. This may be
291+
// used to configure `systemd reload` support, by default the SIGUSR1 linux
292+
// signal number of 10 is used.
293+
SignalEnabled bool `json:"signal_enabled" split_words:"true" default:"false"`
294+
SignalNumber int `json:"signal_number" split_words:"true" default:"10"`
295+
296+
// When at least one reloader is enabled this flag determines how much idle
297+
// time must pass before triggering a reload. This ensures a single
298+
// auth server config reload operation during a burst of config updates.
299+
GracePeriodInterval time.Duration `json:"grace_period_interval" split_words:"true" default:"5s"`
300+
}
301+
269302
// GlobalConfiguration holds all the configuration that applies to all instances.
270303
type GlobalConfiguration struct {
271304
API APIConfiguration
@@ -307,6 +340,7 @@ type GlobalConfiguration struct {
307340
CORS CORSConfiguration `json:"cors"`
308341

309342
Experimental ExperimentalConfiguration `json:"experimental"`
343+
Reloading ReloadingConfiguration `json:"reloading"`
310344
}
311345

312346
type CORSConfiguration struct {

internal/reloader/poller.go

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
package reloader
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"io/fs"
9+
"maps"
10+
"os"
11+
"strings"
12+
"sync"
13+
"time"
14+
)
15+
16+
const (
17+
pollerMaxScan = 1
18+
pollerMaxFiles = 1000
19+
)
20+
21+
type poller struct {
22+
pollMu sync.Mutex
23+
dir string
24+
cur, prev *pollerState
25+
}
26+
27+
type pollerFile struct {
28+
name string
29+
size int64
30+
mode fs.FileMode
31+
mod time.Time
32+
dir bool
33+
}
34+
35+
type pollerState struct {
36+
updatedAt time.Time
37+
files map[string]*pollerFile
38+
}
39+
40+
func (o *pollerState) reset() { clear(o.files) }
41+
42+
func newPollerState() *pollerState {
43+
return &pollerState{
44+
files: make(map[string]*pollerFile),
45+
}
46+
}
47+
48+
func newPollerFile(fi fs.FileInfo) *pollerFile {
49+
return &pollerFile{
50+
name: fi.Name(),
51+
size: fi.Size(),
52+
mode: fi.Mode(),
53+
mod: fi.ModTime(),
54+
dir: fi.IsDir(),
55+
}
56+
}
57+
58+
func newPoller(watchDir string) *poller {
59+
return &poller{
60+
dir: watchDir,
61+
cur: newPollerState(),
62+
prev: newPollerState(),
63+
}
64+
}
65+
66+
func (o *poller) watch(
67+
ctx context.Context,
68+
ival time.Duration,
69+
notifyFn func(),
70+
errFn func(error),
71+
) error {
72+
tr := time.NewTicker(ival)
73+
defer tr.Stop()
74+
75+
for {
76+
select {
77+
case <-ctx.Done():
78+
return ctx.Err()
79+
case <-tr.C:
80+
changed, err := o.poll(ctx)
81+
if err != nil {
82+
errFn(err)
83+
continue
84+
}
85+
if changed {
86+
notifyFn()
87+
}
88+
}
89+
}
90+
}
91+
92+
func (o *poller) poll(ctx context.Context) (bool, error) {
93+
if ok := o.pollMu.TryLock(); !ok {
94+
const msg = "reloader: poller: concurrent calls to poll are invalid"
95+
return false, errors.New(msg)
96+
}
97+
defer o.pollMu.Unlock()
98+
99+
if err := ctx.Err(); err != nil {
100+
return false, err
101+
}
102+
103+
o.prev, o.cur = o.cur, o.prev
104+
if err := o.scan(ctx, o.cur); err != nil {
105+
return false, err
106+
}
107+
o.cur.updatedAt = time.Now()
108+
m1, m2 := o.prev.files, o.cur.files
109+
110+
if o.prev.updatedAt.IsZero() {
111+
return false, nil
112+
}
113+
114+
eq := maps.EqualFunc(m1, m2, func(v1, v2 *pollerFile) bool {
115+
return *v1 == *v2
116+
})
117+
return !eq, nil
118+
}
119+
120+
func (o *poller) scan(
121+
ctx context.Context,
122+
ps *pollerState,
123+
) error {
124+
o.cur.reset()
125+
126+
f, err := os.Open(o.dir)
127+
if err != nil {
128+
return err
129+
}
130+
defer f.Close()
131+
132+
return o.scanFile(ctx, ps, f)
133+
}
134+
135+
func (o *poller) scanFile(
136+
ctx context.Context,
137+
ps *pollerState,
138+
f fs.ReadDirFile,
139+
) error {
140+
fi, err := f.Stat()
141+
if err != nil {
142+
return fmt.Errorf("poller: %w", err)
143+
}
144+
if !fi.IsDir() {
145+
return fmt.Errorf("poller: %q is not a directory", o.dir)
146+
}
147+
148+
for range pollerMaxFiles / pollerMaxScan {
149+
if err := ctx.Err(); err != nil {
150+
return err
151+
}
152+
153+
ents, err := f.ReadDir(pollerMaxScan)
154+
if err == io.EOF {
155+
return nil
156+
}
157+
if err != nil {
158+
return fmt.Errorf("poller: error reading dir %q: %w", o.dir, err)
159+
}
160+
o.scanEntries(ps, ents)
161+
}
162+
return fmt.Errorf("poller: %q has too many files", o.dir)
163+
}
164+
165+
func (o *poller) scanEntries(ps *pollerState, ents []fs.DirEntry) {
166+
for _, ent := range ents {
167+
fi, err := ent.Info()
168+
if err != nil {
169+
continue
170+
}
171+
if fi.IsDir() {
172+
continue
173+
}
174+
if !strings.HasSuffix(ent.Name(), ".env") {
175+
continue
176+
}
177+
178+
pf := newPollerFile(fi)
179+
ps.files[pf.name] = pf
180+
}
181+
}

0 commit comments

Comments
 (0)