Skip to content

Commit ce0f3c8

Browse files
authored
Merge pull request #59 from fly-apps/disk-capacity-check
Toggle primary read-only when disk capacity hits threshold
2 parents 2dbd8c4 + ffe51d0 commit ce0f3c8

File tree

5 files changed

+206
-49
lines changed

5 files changed

+206
-49
lines changed

internal/flycheck/checks.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package flycheck
22

33
import (
44
"context"
5-
"fmt"
5+
"errors"
66
"io"
77
"net/http"
88
"time"
@@ -93,7 +93,7 @@ func handleCheckResponse(w http.ResponseWriter, suite *check.CheckSuite, raw boo
9393
result = suite.Result()
9494
}
9595
if !suite.Passed() {
96-
handleError(w, fmt.Errorf(result))
96+
handleError(w, errors.New(result))
9797
return
9898
}
9999
io.WriteString(w, result)

internal/flycheck/pg.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ import (
1010
"github.com/superfly/fly-checks/check"
1111
)
1212

13+
// Primary will be made read-only when disk capacity reaches this percentage.
14+
const diskCapacityPercentageThreshold = 90.0
15+
1316
// CheckPostgreSQL health, replication, etc
1417
func CheckPostgreSQL(ctx context.Context, checks *check.CheckSuite) (*check.CheckSuite, error) {
15-
1618
node, err := flypg.NewNode()
1719
if err != nil {
1820
return checks, errors.Wrap(err, "failed to initialize node")
@@ -23,18 +25,65 @@ func CheckPostgreSQL(ctx context.Context, checks *check.CheckSuite) (*check.Chec
2325
return checks, errors.Wrap(err, "failed to connect with local node")
2426
}
2527

28+
repConn, err := node.RepMgr.NewLocalConnection(ctx)
29+
if err != nil {
30+
return checks, fmt.Errorf("failed to connect to repmgr node: %s", err)
31+
}
32+
33+
member, err := node.RepMgr.Member(ctx, repConn)
34+
if err != nil {
35+
return checks, fmt.Errorf("failed to resolve local member role: %s", err)
36+
}
37+
2638
// Cleanup connections
2739
checks.OnCompletion = func() {
2840
localConn.Close(ctx)
41+
repConn.Close(ctx)
2942
}
3043

3144
checks.AddCheck("connections", func() (string, error) {
3245
return connectionCount(ctx, localConn)
3346
})
3447

48+
if member.Role == flypg.PrimaryRoleName && member.Active {
49+
// Check that provides additional insight into disk capacity and
50+
// how close we are to hitting the readonly threshold.
51+
checks.AddCheck("disk-capacity", func() (string, error) {
52+
return diskCapacityCheck(ctx, localConn, node)
53+
})
54+
}
55+
3556
return checks, nil
3657
}
3758

59+
func diskCapacityCheck(ctx context.Context, localConn *pgx.Conn, node *flypg.Node) (string, error) {
60+
// Calculate current disk usage
61+
size, available, err := diskUsage("/data/")
62+
if err != nil {
63+
return "", fmt.Errorf("failed to calculate disk usage: %s", err)
64+
}
65+
66+
usedPercentage := float64(size-available) / float64(size) * 100
67+
68+
// Turn primary read-only
69+
if usedPercentage > diskCapacityPercentageThreshold {
70+
if err := flypg.SetReadOnly(ctx, node, localConn); err != nil {
71+
return "", fmt.Errorf("failed to turn primary readonly: %s", err)
72+
}
73+
74+
return "", fmt.Errorf("%0.1f%% - readonly mode enabled, extend your volume to re-enable writes", usedPercentage)
75+
}
76+
77+
// Don't attempt to turn read/write if zombie lock exists.
78+
if !flypg.ZombieLockExists() {
79+
if err := flypg.UnsetReadOnly(ctx, node, localConn); err != nil {
80+
return "", fmt.Errorf("failed to turn primary read/write: %s", err)
81+
}
82+
}
83+
84+
return fmt.Sprintf("%0.1f%% - readonly mode will be enabled at %0.1f%%", usedPercentage, diskCapacityPercentageThreshold), nil
85+
}
86+
3887
func connectionCount(ctx context.Context, local *pgx.Conn) (string, error) {
3988
sql := `select used, res_for_super as reserved, max_conn as max from
4089
(select count(*) used from pg_stat_activity) q1,

internal/flypg/admin/admin.go

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -330,45 +330,3 @@ func GetSetting(ctx context.Context, pg *pgx.Conn, setting string) (*PGSetting,
330330
}
331331
return &out, nil
332332
}
333-
334-
func SetReadOnly(ctx context.Context, conn *pgx.Conn) error {
335-
databases, err := ListDatabases(ctx, conn)
336-
if err != nil {
337-
return err
338-
}
339-
340-
for _, db := range databases {
341-
if db.Name == "repmgr" || db.Name == "postgres" {
342-
continue
343-
}
344-
345-
sql := fmt.Sprintf("ALTER DATABASE %s set default_transaction_read_only = true;", db.Name)
346-
_, err := conn.Exec(ctx, sql)
347-
if err != nil {
348-
return err
349-
}
350-
}
351-
352-
return nil
353-
}
354-
355-
func UnsetReadOnly(ctx context.Context, conn *pgx.Conn) error {
356-
databases, err := ListDatabases(ctx, conn)
357-
if err != nil {
358-
return err
359-
}
360-
361-
for _, db := range databases {
362-
if db.Name == "repmgr" || db.Name == "postgres" {
363-
continue
364-
}
365-
366-
sql := fmt.Sprintf("ALTER DATABASE %s set default_transaction_read_only = false;", db.Name)
367-
_, err := conn.Exec(ctx, sql)
368-
if err != nil {
369-
return err
370-
}
371-
}
372-
373-
return nil
374-
}

internal/flypg/node.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ func (n *Node) PostInit(ctx context.Context) error {
417417
}
418418

419419
fmt.Println("Turning all user-created databases readonly.")
420-
if err := admin.SetReadOnly(ctx, conn); err != nil {
420+
if err := SetReadOnly(ctx, n, conn); err != nil {
421421
return fmt.Errorf("failed to set read-only: %s", err)
422422
}
423423

@@ -439,7 +439,7 @@ func (n *Node) PostInit(ctx context.Context) error {
439439
}
440440

441441
fmt.Println("Turning user-created databases read-only")
442-
if err := admin.SetReadOnly(ctx, conn); err != nil {
442+
if err := SetReadOnly(ctx, n, conn); err != nil {
443443
return fmt.Errorf("failed to set read-only: %s", err)
444444
}
445445

@@ -457,8 +457,11 @@ func (n *Node) PostInit(ctx context.Context) error {
457457
return fmt.Errorf("failed to reconfigure pgbouncer: %s", err)
458458
}
459459

460-
if err := admin.UnsetReadOnly(ctx, conn); err != nil {
461-
return fmt.Errorf("failed to unset read-only")
460+
// Readonly lock is set by healthchecks when disk capacity is dangerously high.
461+
if !ReadOnlyLockExists() {
462+
if err := UnsetReadOnly(ctx, n, conn); err != nil {
463+
return fmt.Errorf("failed to unset read-only: %s", err)
464+
}
462465
}
463466

464467
default:
@@ -498,6 +501,11 @@ func (n *Node) NewLocalConnection(ctx context.Context, database string) (*pgx.Co
498501
return openConnection(ctx, host, database, n.OperatorCredentials)
499502
}
500503

504+
func (n *Node) NewPrimaryConnection(ctx context.Context, database string) (*pgx.Conn, error) {
505+
host := net.JoinHostPort(n.PrivateIP, strconv.Itoa(n.PGBouncer.Port))
506+
return openConnection(ctx, host, database, n.OperatorCredentials)
507+
}
508+
501509
func (n *Node) initializePG() error {
502510
if n.isPGInitialized() {
503511
return nil

internal/flypg/readonly.go

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package flypg
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"time"
8+
9+
"github.com/fly-apps/postgres-flex/internal/flypg/admin"
10+
"github.com/jackc/pgx/v5"
11+
)
12+
13+
const (
14+
readOnlyLockFile = "/data/readonly.lock"
15+
readOnlyEnabled = "on"
16+
readOnlyDisabled = "off"
17+
)
18+
19+
func SetReadOnly(ctx context.Context, n *Node, conn *pgx.Conn) error {
20+
if err := writeReadOnlyLock(); err != nil {
21+
return fmt.Errorf("failed to set readonly lock: %s", err)
22+
}
23+
24+
databases, err := admin.ListDatabases(ctx, conn)
25+
if err != nil {
26+
return err
27+
}
28+
29+
for _, db := range databases {
30+
// exclude administrative dbs
31+
if db.Name == "repmgr" || db.Name == "postgres" {
32+
continue
33+
}
34+
35+
// Route configuration change through PGBouncer
36+
dbConn, err := n.NewPrimaryConnection(ctx, db.Name)
37+
if err != err {
38+
return fmt.Errorf("failed to establish connection to db %s: %s", db.Name, err)
39+
}
40+
defer dbConn.Close(ctx)
41+
42+
// Set readonly
43+
if _, err = dbConn.Exec(ctx, "SET default_transaction_read_only=true;"); err != nil {
44+
return fmt.Errorf("failed to set readonly on db %s: %s", db.Name, err)
45+
}
46+
47+
// Query configuration value and confirm the value change.
48+
var status string
49+
dbConn.QueryRow(ctx, "SHOW default_transaction_read_only;").Scan(&status)
50+
if err != nil {
51+
return fmt.Errorf("failed to verify readonly was unset: %s", err)
52+
}
53+
54+
if status == readOnlyDisabled {
55+
return fmt.Errorf("failed to turn database '%s' readonly", db.Name)
56+
}
57+
}
58+
59+
return nil
60+
}
61+
62+
func UnsetReadOnly(ctx context.Context, n *Node, conn *pgx.Conn) error {
63+
// Skip if there's no readonly lock present
64+
if !ReadOnlyLockExists() {
65+
return nil
66+
}
67+
68+
databases, err := admin.ListDatabases(ctx, conn)
69+
if err != nil {
70+
return err
71+
}
72+
73+
for _, db := range databases {
74+
// exclude administrative dbs
75+
if db.Name == "repmgr" || db.Name == "postgres" {
76+
continue
77+
}
78+
79+
// Route configuration change through PGBouncer
80+
dbConn, err := n.NewPrimaryConnection(ctx, db.Name)
81+
if err != err {
82+
return fmt.Errorf("failed to establish connection to db %s: %s", db.Name, err)
83+
}
84+
defer dbConn.Close(ctx)
85+
86+
// Disable readonly
87+
_, err = dbConn.Exec(ctx, "SET default_transaction_read_only=false;")
88+
if err != nil {
89+
return fmt.Errorf("failed to unset readonly on db %s: %s", db.Name, err)
90+
}
91+
92+
// Query configuration value and confirm the value change.
93+
var status string
94+
dbConn.QueryRow(ctx, "SHOW default_transaction_read_only;").Scan(&status)
95+
if err != nil {
96+
return fmt.Errorf("failed to verify readonly was unset: %s", err)
97+
}
98+
99+
if status == readOnlyEnabled {
100+
return fmt.Errorf("failed to turn database '%s' read/write : %s", db.Name, err)
101+
}
102+
}
103+
104+
if err := removeReadOnlyLock(); err != nil {
105+
return fmt.Errorf("failed to remove readonly lock: %s", err)
106+
}
107+
108+
return nil
109+
}
110+
111+
func ReadOnlyLockExists() bool {
112+
_, err := os.Stat(readOnlyLockFile)
113+
if os.IsNotExist(err) {
114+
return false
115+
}
116+
117+
return true
118+
}
119+
120+
func writeReadOnlyLock() error {
121+
if ReadOnlyLockExists() {
122+
return nil
123+
}
124+
125+
if err := os.WriteFile(readOnlyLockFile, []byte(time.Now().String()), 0644); err != nil {
126+
return err
127+
}
128+
129+
return nil
130+
}
131+
132+
func removeReadOnlyLock() error {
133+
if !ReadOnlyLockExists() {
134+
return nil
135+
}
136+
137+
if err := os.Remove(readOnlyLockFile); err != nil {
138+
return err
139+
}
140+
141+
return nil
142+
}

0 commit comments

Comments
 (0)