Skip to content

Commit a72f5d9

Browse files
authored
feat: add advisor to notify you when to double the max connection pool (#2167)
Adds a simple advisor that emits a log message every hour, if the number of [DBStats](https://pkg.go.dev/database/sql#DBStats) samples exceeds: - 1/3 or more samples indicate more than 1ms wait for every millisecond in the sampling interval - 1/3 or more samples indicate 2 or more goroutines have been waiting to acquire a connection from the pool For instance, if you set `GOTRUE_DB_ADVISOR_SAMPLING_INTERVAL="100ms"` and `GOTRUE_DB_ADVISOR_OBSERVATION_INTERVAL="1s"` then: - There are 1000 / 100 = 10 samples - Every 100ms a sample is taken and the conditions are checked - Log message is emitted if either one of the conditions are met at most once per hour
1 parent 68cb8d2 commit a72f5d9

File tree

4 files changed

+222
-2
lines changed

4 files changed

+222
-2
lines changed

internal/conf/configuration.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ type EmailProviderConfiguration struct {
9393
MagicLinkEnabled bool `json:"magic_link_enabled" default:"true" split_words:"true"`
9494
}
9595

96+
type DBAdvisorConfiguration struct {
97+
Enabled bool `json:"enabled" default:"true"`
98+
SamplingInterval time.Duration `json:"sampling_interval" split_words:"true" default:"200ms"`
99+
ObservationInterval time.Duration `json:"observation_interval" split_words:"true" default:"20s"`
100+
}
101+
96102
// DBConfiguration holds all the database related configuration.
97103
type DBConfiguration struct {
98104
Driver string `json:"driver" required:"true"`
@@ -106,6 +112,8 @@ type DBConfiguration struct {
106112
HealthCheckPeriod time.Duration `json:"health_check_period" split_words:"true"`
107113
MigrationsPath string `json:"migrations_path" split_words:"true" default:"./migrations"`
108114
CleanupEnabled bool `json:"cleanup_enabled" split_words:"true" default:"false"`
115+
116+
Advisor DBAdvisorConfiguration `json:"advisor"`
109117
}
110118

111119
func (c *DBConfiguration) Validate() error {

internal/storage/advisor.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package storage
2+
3+
import (
4+
"database/sql"
5+
"math"
6+
"time"
7+
)
8+
9+
type Advisory struct {
10+
LongWaitDurationSamples int
11+
Over2WaitingSamples int
12+
}
13+
14+
type Advisor struct {
15+
StatsFunc func() sql.DBStats
16+
AdviseFunc func(Advisory)
17+
Interval time.Duration
18+
19+
Stats sql.DBStats
20+
LastAdvisedAt time.Time
21+
22+
Iterations int
23+
24+
WaitDurationSamples []time.Duration
25+
WaitCountSamples []int64
26+
}
27+
28+
func (a *Advisor) Start(observeDuration time.Duration) {
29+
a.setup(observeDuration)
30+
31+
go func() {
32+
// after server start the db stats are going to be worse, so ignore that period
33+
time.Sleep(observeDuration)
34+
35+
a.Stats = a.StatsFunc()
36+
37+
for {
38+
time.Sleep(a.Interval)
39+
a.loop()
40+
}
41+
}()
42+
}
43+
44+
func (a *Advisor) setup(observeDuration time.Duration) {
45+
nSamples := int(math.Round(observeDuration.Seconds() / a.Interval.Seconds()))
46+
47+
a.WaitDurationSamples = make([]time.Duration, nSamples)
48+
a.WaitCountSamples = make([]int64, nSamples)
49+
}
50+
51+
func (a *Advisor) loop() {
52+
a.Iterations += 1
53+
if a.Iterations < 0 {
54+
a.Iterations = 0
55+
}
56+
57+
previousStats := a.Stats
58+
a.Stats = a.StatsFunc()
59+
60+
a.WaitDurationSamples[a.Iterations%len(a.WaitDurationSamples)] = a.Stats.WaitDuration - previousStats.WaitDuration
61+
a.WaitCountSamples[a.Iterations%len(a.WaitCountSamples)] = a.Stats.WaitCount - previousStats.WaitCount
62+
63+
advise := false
64+
65+
longWaitDurationSamples := 0
66+
if a.Iterations >= len(a.WaitDurationSamples) {
67+
for _, sample := range a.WaitDurationSamples {
68+
if sample >= a.Interval {
69+
longWaitDurationSamples += 1
70+
}
71+
}
72+
73+
// 1/3 of the observation time was spent waiting for over one sampling interval
74+
advise = longWaitDurationSamples >= (len(a.WaitDurationSamples) / 3)
75+
}
76+
77+
over2WaitingSamples := 0
78+
if !advise && a.Iterations >= len(a.WaitCountSamples) {
79+
for _, sample := range a.WaitCountSamples {
80+
if sample > 2 {
81+
over2WaitingSamples += 1
82+
}
83+
}
84+
85+
// 1/3 of the observation time we saw more than 2 goroutines waiting for a connection
86+
advise = over2WaitingSamples >= (len(a.WaitCountSamples) / 3)
87+
}
88+
89+
if advise && time.Since(a.LastAdvisedAt) >= time.Hour {
90+
a.LastAdvisedAt = time.Now()
91+
92+
a.AdviseFunc(Advisory{
93+
LongWaitDurationSamples: longWaitDurationSamples,
94+
Over2WaitingSamples: over2WaitingSamples,
95+
})
96+
97+
}
98+
}

internal/storage/advisor_test.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package storage
2+
3+
import (
4+
"database/sql"
5+
"testing"
6+
"time"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestAdvisorZeroChanges(t *testing.T) {
12+
advised := false
13+
14+
advisor := Advisor{
15+
AdviseFunc: func(advisory Advisory) {
16+
advised = true
17+
},
18+
StatsFunc: func() sql.DBStats {
19+
return sql.DBStats{}
20+
},
21+
Interval: time.Millisecond,
22+
}
23+
24+
advisor.setup(10 * time.Millisecond)
25+
26+
require.Len(t, advisor.WaitDurationSamples, 10)
27+
require.Len(t, advisor.WaitCountSamples, 10)
28+
29+
advisor.Stats = advisor.StatsFunc()
30+
31+
for i := 0; i < 11; i += 1 {
32+
advisor.loop()
33+
}
34+
35+
require.False(t, advised)
36+
}
37+
38+
func TestAdvisorWaitDuration(t *testing.T) {
39+
advised := 0
40+
41+
dbStats := sql.DBStats{}
42+
43+
advisor := Advisor{
44+
AdviseFunc: func(advisory Advisory) {
45+
advised += 1
46+
},
47+
StatsFunc: func() sql.DBStats {
48+
return dbStats
49+
},
50+
Interval: time.Millisecond,
51+
}
52+
53+
advisor.setup(10 * time.Millisecond)
54+
55+
require.Len(t, advisor.WaitDurationSamples, 10)
56+
require.Len(t, advisor.WaitCountSamples, 10)
57+
58+
advisor.Stats = advisor.StatsFunc()
59+
60+
for i := 0; i < 20; i += 1 {
61+
dbStats.WaitDuration += time.Millisecond
62+
advisor.loop()
63+
}
64+
65+
require.Equal(t, advised, 1)
66+
}
67+
68+
func TestAdvisorWaitCount(t *testing.T) {
69+
advised := 0
70+
71+
dbStats := sql.DBStats{}
72+
73+
advisor := Advisor{
74+
AdviseFunc: func(advisory Advisory) {
75+
advised += 1
76+
},
77+
StatsFunc: func() sql.DBStats {
78+
return dbStats
79+
},
80+
Interval: time.Millisecond,
81+
}
82+
83+
advisor.setup(10 * time.Millisecond)
84+
85+
require.Len(t, advisor.WaitDurationSamples, 10)
86+
require.Len(t, advisor.WaitCountSamples, 10)
87+
88+
advisor.Stats = advisor.StatsFunc()
89+
90+
for i := 0; i < 20; i += 1 {
91+
dbStats.WaitCount += 3
92+
advisor.loop()
93+
}
94+
95+
require.Equal(t, advised, 1)
96+
}

internal/storage/dial.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,13 @@ func Dial(config *conf.GlobalConfiguration) (*Connection, error) {
8383
}
8484

8585
if config.Metrics.Enabled {
86-
registerOpenTelemetryDatabaseStats(db)
86+
registerOpenTelemetryDatabaseStats(db, config)
8787
}
8888

8989
return &Connection{db}, nil
9090
}
9191

92-
func registerOpenTelemetryDatabaseStats(db *pop.Connection) {
92+
func registerOpenTelemetryDatabaseStats(db *pop.Connection, config *conf.GlobalConfiguration) {
9393
defer func() {
9494
if rec := recover(); rec != nil {
9595
logrus.WithField("error", rec).Error("registerOpenTelemetryDatabaseStats is not able to determine database object with reflection -- panicked")
@@ -111,6 +111,24 @@ func registerOpenTelemetryDatabaseStats(db *pop.Connection) {
111111
} else {
112112
logrus.Debug("registered OpenTelemetry stats metrics for database")
113113
}
114+
115+
if config.DB.Advisor.Enabled {
116+
advisor := Advisor{
117+
StatsFunc: func() sql.DBStats {
118+
return sqldb.Stats()
119+
},
120+
Interval: config.DB.Advisor.SamplingInterval,
121+
AdviseFunc: func(advisory Advisory) {
122+
logrus.WithFields(logrus.Fields{
123+
"component": "db.advisor",
124+
"long_wait_duration_samples": advisory.LongWaitDurationSamples,
125+
"over_2_waiting_samples": advisory.Over2WaitingSamples,
126+
}).Warn("Suboptimal database connection pool settings detected! Consider doubling the max DB pool size configuration")
127+
},
128+
}
129+
130+
advisor.Start(config.DB.Advisor.ObservationInterval)
131+
}
114132
}
115133

116134
type CommitWithError struct {

0 commit comments

Comments
 (0)