Skip to content

Commit 25c7515

Browse files
authored
Threadsafe logging (#148)
* Add logging that's threadsafe * Cleanup * Removing info tags * Updates
1 parent 93d0c3a commit 25c7515

File tree

11 files changed

+50
-41
lines changed

11 files changed

+50
-41
lines changed

cmd/start/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package main
33
import (
44
"context"
55
"fmt"
6+
"log"
67
"os"
78
"syscall"
89
"time"
@@ -12,6 +13,8 @@ import (
1213
)
1314

1415
func main() {
16+
log.SetFlags(0)
17+
1518
requiredPasswords := []string{"SU_PASSWORD", "OPERATOR_PASSWORD", "REPL_PASSWORD"}
1619
for _, str := range requiredPasswords {
1720
if _, exists := os.LookupEnv(str); !exists {

internal/api/response.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ func status(err error) int {
3535
var pgErr *pgconn.PgError
3636

3737
if errors.As(err, &pgErr) {
38-
// fmt.Printf("%s: %s\n", pgErr.Code, pgErr.Message)
3938
switch pgErr.Code {
4039
case "42710": // unique violation
4140
return http.StatusConflict

internal/flycheck/pg.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package flycheck
33
import (
44
"context"
55
"fmt"
6+
"log"
67

78
"github.com/fly-apps/postgres-flex/internal/flypg"
89
"github.com/jackc/pgx/v5"
@@ -69,9 +70,9 @@ func diskCapacityCheck(ctx context.Context, node *flypg.Node) (string, error) {
6970
// If the read-only lock has already been set, we can assume that we've already
7071
// broadcasted.
7172
if !flypg.ReadOnlyLockExists() {
72-
fmt.Println("Broadcasting readonly change to registered standbys")
73+
log.Println("[WARN] Broadcasting readonly change to registered standbys")
7374
if err := flypg.BroadcastReadonlyChange(ctx, node, true); err != nil {
74-
fmt.Printf("errors with enable readonly broadcast: %s\n", err)
75+
log.Printf("[ERROR] Failed to enable readonly: %s\n", err)
7576
}
7677
}
7778

@@ -81,7 +82,7 @@ func diskCapacityCheck(ctx context.Context, node *flypg.Node) (string, error) {
8182
// Don't attempt to disable readonly if there's a zombie.lock
8283
if !flypg.ZombieLockExists() && flypg.ReadOnlyLockExists() {
8384
if err := flypg.BroadcastReadonlyChange(ctx, node, false); err != nil {
84-
fmt.Printf("errors with disable readonly broadcast: %s\n", err)
85+
log.Printf("[ERROR] Failed to disable readonly: %s\n", err)
8586
}
8687
}
8788

internal/flypg/node.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"log"
78
"net"
89
"os"
910
"os/exec"
@@ -163,7 +164,7 @@ func (n *Node) Init(ctx context.Context) error {
163164
}
164165

165166
if !clusterInitialized {
166-
fmt.Println("Provisioning primary")
167+
log.Println("Provisioning primary")
167168
// TODO - This should probably run on boot in case the password changes.
168169
if err := n.PGConfig.writePasswordFile(n.OperatorCredentials.Password); err != nil {
169170
return fmt.Errorf("failed to write pg password file: %s", err)
@@ -173,7 +174,7 @@ func (n *Node) Init(ctx context.Context) error {
173174
return fmt.Errorf("failed to initialize postgres %s", err)
174175
}
175176
} else {
176-
fmt.Println("Provisioning standby")
177+
log.Println("Provisioning standby")
177178
cloneTarget, err := n.RepMgr.ResolveMemberOverDNS(ctx)
178179
if err != nil {
179180
return fmt.Errorf("failed to resolve member over dns: %s", err)
@@ -182,7 +183,7 @@ func (n *Node) Init(ctx context.Context) error {
182183
if err := n.RepMgr.clonePrimary(cloneTarget.Hostname); err != nil {
183184
// Clean-up the directory so it can be retried.
184185
if rErr := os.Remove(n.DataDir); rErr != nil {
185-
fmt.Printf("failed to cleanup postgresql dir after clone error: %s\n", rErr)
186+
log.Printf("[ERROR] failed to cleanup postgresql dir after clone error: %s\n", rErr)
186187
}
187188

188189
return fmt.Errorf("failed to clone primary: %s", err)
@@ -208,8 +209,8 @@ func (n *Node) Init(ctx context.Context) error {
208209
// PostInit are operations that need to be executed against a running Postgres on boot.
209210
func (n *Node) PostInit(ctx context.Context) error {
210211
if ZombieLockExists() {
211-
fmt.Println("Manual intervention required. Delete the zombie.lock file and restart the machine to force a retry.")
212-
fmt.Println("Sleeping for 5 minutes.")
212+
log.Println("[ERROR] Manual intervention required. Delete the zombie.lock file and restart the machine to force a retry.")
213+
log.Println("[ERROR] Sleeping for 5 minutes.")
213214
time.Sleep(5 * time.Minute)
214215
return fmt.Errorf("unrecoverable zombie")
215216
}
@@ -219,8 +220,8 @@ func (n *Node) PostInit(ctx context.Context) error {
219220
if err != nil {
220221
// Check to see if this is an authentication error.
221222
if strings.Contains(err.Error(), "28P01") {
222-
fmt.Println("WARNING: `postgres` user password does not match the `OPERATOR_PASSWORD` secret")
223-
fmt.Printf("HINT: Use `fly secrets set OPERATOR_PASSWORD=<password> --app %s` to resolve the issue\n", n.AppName)
223+
log.Println("[WARN] `postgres` user password does not match the `OPERATOR_PASSWORD` secret")
224+
log.Printf("[WARN] Use `fly secrets set OPERATOR_PASSWORD=<password> --app %s` to resolve the issue\n", n.AppName)
224225
}
225226

226227
return fmt.Errorf("failed to establish connection to local node: %s", err)
@@ -251,13 +252,13 @@ func (n *Node) PostInit(ctx context.Context) error {
251252
// Verify cluster state to ensure we are the actual primary and not a zombie.
252253
primary, err := PerformScreening(ctx, conn, n)
253254
if errors.Is(err, ErrZombieDiagnosisUndecided) {
254-
fmt.Println("Unable to confirm that we are the true primary!")
255+
log.Println("[ERROR] Unable to confirm that we are the true primary!")
255256
// Turn member read-only
256257
if err := Quarantine(ctx, n, primary); err != nil {
257258
return fmt.Errorf("failed to quarantine failed primary: %s", err)
258259
}
259260
} else if errors.Is(err, ErrZombieDiscovered) {
260-
fmt.Printf("The majority of registered members agree that '%s' is the real primary.\n", primary)
261+
log.Printf("[ERROR] The majority of registered members agree that '%s' is the real primary.\n", primary)
261262
// Turn member read-only
262263
if err := Quarantine(ctx, n, primary); err != nil {
263264
return fmt.Errorf("failed to quarantine failed primary: %s", err)
@@ -285,7 +286,7 @@ func (n *Node) PostInit(ctx context.Context) error {
285286
case StandbyRoleName:
286287
// Register existing standby to take-on any configuration changes.
287288
if err := n.RepMgr.registerStandby(); err != nil {
288-
fmt.Printf("failed to register existing standby: %s\n", err)
289+
return fmt.Errorf("failed to register existing standby: %s", err)
289290
}
290291
default:
291292
return fmt.Errorf("member has unknown role: %q", member.Role)
@@ -312,7 +313,7 @@ func (n *Node) PostInit(ctx context.Context) error {
312313

313314
if !clusterInitialized {
314315
// Configure as primary
315-
fmt.Println("Registering primary")
316+
log.Println("Registering primary")
316317

317318
// Verify we reside within the clusters primary region
318319
if !n.RepMgr.eligiblePrimary() {
@@ -349,9 +350,9 @@ func (n *Node) PostInit(ctx context.Context) error {
349350
}
350351
} else {
351352
// Configure as standby
352-
fmt.Println("Registering standby")
353+
log.Println("Registering standby")
353354
if err := n.RepMgr.registerStandby(); err != nil {
354-
fmt.Printf("failed to register new standby: %s\n", err)
355+
return fmt.Errorf("failed to register new standby: %s", err)
355356
}
356357

357358
// Let the boot process know that we've already been configured.

internal/flypg/pg.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"fmt"
77
"io"
8+
"log"
89
"os"
910
"strconv"
1011
"strings"
@@ -177,7 +178,7 @@ func (c *PGConfig) SetDefaults() error {
177178
func (c *PGConfig) RuntimeApply(ctx context.Context, conn *pgx.Conn) error {
178179
for key, value := range c.userConfig {
179180
if err := admin.SetConfigurationSetting(ctx, conn, key, value); err != nil {
180-
fmt.Printf("failed to set configuration setting %s -> %s: %s", key, value, err)
181+
log.Printf("[WARN] Failed to set configuration setting %s -> %s: %s", key, value, err)
181182
}
182183
}
183184

@@ -230,8 +231,8 @@ func (c *PGConfig) initialize(store *state.Store) error {
230231
}
231232

232233
if err := SyncUserConfig(c, store); err != nil {
233-
fmt.Printf("WARNING: Failed to sync user config from consul for postgres: %s\n", err.Error())
234-
fmt.Println(" This may cause this node to behave unexpectedly")
234+
log.Printf("[WARN] Failed to sync user config from consul for postgres: %s\n", err.Error())
235+
log.Println("[WARN] This may cause this node to behave unexpectedly")
235236
if err := writeInternalConfigFile(c); err != nil {
236237
return fmt.Errorf("failed to write pg config files: %s", err)
237238
}

internal/flypg/readonly.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package flypg
33
import (
44
"context"
55
"fmt"
6+
"log"
67
"net/http"
78
"os"
89
"time"
@@ -72,13 +73,13 @@ func BroadcastReadonlyChange(ctx context.Context, n *Node, enabled bool) error {
7273
endpoint := fmt.Sprintf("http://[%s]:5500/%s", member.Hostname, target)
7374
resp, err := http.Get(endpoint)
7475
if err != nil {
75-
fmt.Printf("failed to broadcast readonly state change to member %s: %s", member.Hostname, err)
76+
log.Printf("[WARN] Failed to broadcast readonly state change to member %s: %s", member.Hostname, err)
7677
continue
7778
}
7879
defer func() { _ = resp.Body.Close() }()
7980

8081
if resp.StatusCode > 299 {
81-
fmt.Printf("failed to broadcast readonly state change to member %s: %d\n", member.Hostname, resp.StatusCode)
82+
log.Printf("[WARN] Failed to broadcast readonly state change to member %s: %d\n", member.Hostname, resp.StatusCode)
8283
}
8384
}
8485
}
@@ -87,13 +88,13 @@ func BroadcastReadonlyChange(ctx context.Context, n *Node, enabled bool) error {
8788
endpoint := fmt.Sprintf("http://[%s]:5500/%s", member.Hostname, RestartHaproxyEndpoint)
8889
resp, err := http.Get(endpoint)
8990
if err != nil {
90-
fmt.Printf("failed restart haproxy on member %s: %s", member.Hostname, err)
91+
log.Printf("[WARN] Failed restart haproxy on member %s: %s", member.Hostname, err)
9192
continue
9293
}
9394
defer func() { _ = resp.Body.Close() }()
9495

9596
if resp.StatusCode > 299 {
96-
fmt.Printf("failed to restart haproxy on member %s: %d\n", member.Hostname, resp.StatusCode)
97+
log.Printf("[WARN] Failed to restart haproxy on member %s: %d\n", member.Hostname, resp.StatusCode)
9798
}
9899
}
99100

@@ -115,7 +116,7 @@ func writeReadOnlyLock() error {
115116
}
116117

117118
if err := os.WriteFile(readOnlyLockFile, []byte(time.Now().String()), 0600); err != nil {
118-
return err
119+
return fmt.Errorf("failed to create readonly.lock: %s", err)
119120
}
120121

121122
if err := utils.SetFileOwnership(readOnlyLockFile, "postgres"); err != nil {

internal/flypg/registration.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"log"
78
"os"
89

910
"github.com/fly-apps/postgres-flex/internal/flypg/admin"
@@ -53,7 +54,7 @@ func isRegistered(ctx context.Context, conn *pgx.Conn, n *Node) (bool, error) {
5354
// If we are active, issue registration certificate
5455
if member.Active {
5556
if err := issueRegistrationCert(); err != nil {
56-
fmt.Println("failed to issue registration certificate.")
57+
log.Println("[WARN] Failed to issue registration certificate.")
5758
return true, nil
5859
}
5960
}

internal/flypg/repmgr.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"crypto/rand"
66
"fmt"
7+
"log"
78
"math"
89
"math/big"
910
"net"
@@ -235,7 +236,7 @@ func (r *RepMgr) rejoinCluster(hostname string) error {
235236
r.DatabaseName,
236237
)
237238

238-
fmt.Println(cmdStr)
239+
log.Println(cmdStr)
239240
_, err := utils.RunCommand(cmdStr, "postgres")
240241

241242
return err
@@ -245,7 +246,7 @@ func (r *RepMgr) registerStandby() error {
245246
// Force re-registry to ensure the standby picks up any new configuration changes.
246247
cmdStr := fmt.Sprintf("repmgr -f %s standby register -F", r.ConfigPath)
247248
if _, err := utils.RunCommand(cmdStr, "postgres"); err != nil {
248-
fmt.Printf("failed to register standby: %s", err)
249+
return err
249250
}
250251

251252
return nil
@@ -254,7 +255,7 @@ func (r *RepMgr) registerStandby() error {
254255
func (r *RepMgr) unregisterStandby(id int) error {
255256
cmdStr := fmt.Sprintf("repmgr standby unregister -f %s --node-id=%d", r.ConfigPath, id)
256257
if _, err := utils.RunCommand(cmdStr, "postgres"); err != nil {
257-
fmt.Printf("failed to unregister standby: %s", err)
258+
return err
258259
}
259260

260261
return nil
@@ -273,7 +274,7 @@ func (r *RepMgr) clonePrimary(ipStr string) error {
273274
r.Credentials.Username,
274275
r.ConfigPath)
275276

276-
fmt.Println(cmdStr)
277+
log.Println(cmdStr)
277278
if _, err := utils.RunCommand(cmdStr, "postgres"); err != nil {
278279
return fmt.Errorf("failed to clone primary: %s", err)
279280
}
@@ -397,7 +398,6 @@ func (r *RepMgr) ResolveMemberOverDNS(ctx context.Context) (*Member, error) {
397398

398399
member, err := r.MemberByHostname(ctx, conn, ip.String())
399400
if err != nil {
400-
fmt.Printf("failed to resolve role from %s", ip.String())
401401
continue
402402
}
403403

internal/flypg/restore.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func Restore(ctx context.Context, node *Node) error {
5252

5353
go func() {
5454
if err := svisor.Run(); err != nil {
55-
log.Printf("failed to boot postgres in the background: %s", err)
55+
log.Printf("[ERROR] failed to boot postgres in the background: %s", err)
5656
}
5757
}()
5858

internal/flypg/zombie.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"log"
78
"net"
89
"os"
910

@@ -68,7 +69,7 @@ func PerformScreening(ctx context.Context, conn *pgx.Conn, n *Node) (string, err
6869
return "", fmt.Errorf("failed to evaluate cluster data: %s", err)
6970
}
7071

71-
fmt.Println(DNASampleString(sample))
72+
log.Println(DNASampleString(sample))
7273

7374
return ZombieDiagnosis(sample)
7475
}
@@ -96,7 +97,7 @@ func TakeDNASample(ctx context.Context, node *Node, standbys []Member) (*DNASamp
9697
// Check for connectivity
9798
mConn, err := node.RepMgr.NewRemoteConnection(ctx, standby.Hostname)
9899
if err != nil {
99-
fmt.Printf("failed to connect to %s\n", standby.Hostname)
100+
log.Printf("[WARN] Failed to connect to %s\n", standby.Hostname)
100101
sample.totalInactive++
101102
continue
102103
}
@@ -105,7 +106,7 @@ func TakeDNASample(ctx context.Context, node *Node, standbys []Member) (*DNASamp
105106
// Verify the primary
106107
primary, err := node.RepMgr.PrimaryMember(ctx, mConn)
107108
if err != nil {
108-
fmt.Printf("failed to resolve primary from standby %s\n", standby.Hostname)
109+
log.Printf("[WARN] Failed to resolve primary from standby %s\n", standby.Hostname)
109110
sample.totalInactive++
110111
continue
111112
}
@@ -190,7 +191,7 @@ func DNASampleString(s *DNASample) string {
190191
}
191192

192193
func handleZombieLock(ctx context.Context, n *Node) error {
193-
fmt.Println("Zombie lock detected!")
194+
log.Println("[WARN] Zombie lock detected!")
194195
primaryStr, err := ReadZombieLock()
195196
if err != nil {
196197
return fmt.Errorf("failed to read zombie lock: %s", primaryStr)
@@ -227,7 +228,7 @@ func handleZombieLock(ctx context.Context, n *Node) error {
227228

228229
// If the primary does not reside within our primary region, we cannot rejoin until it is.
229230
if primary.Region != n.PrimaryRegion {
230-
fmt.Printf("Primary region mismatch detected. The primary lives in '%s', while PRIMARY_REGION is set to '%s'\n", primary.Region, n.PrimaryRegion)
231+
log.Printf("[WARN] Primary region mismatch detected. The primary lives in '%s', while PRIMARY_REGION is set to '%s'\n", primary.Region, n.PrimaryRegion)
231232
return ErrZombieLockRegionMismatch
232233
}
233234

@@ -248,8 +249,8 @@ func handleZombieLock(ctx context.Context, n *Node) error {
248249
}
249250
} else {
250251
// TODO - Provide link to documention on how to address this
251-
fmt.Println("Zombie lock file does not contain a hostname.")
252-
fmt.Println("This likely means that we were unable to determine who the real primary is.")
252+
log.Println("[WARN] Zombie lock file does not contain a hostname.")
253+
log.Println("[WARN] This likely means that we were unable to determine who the real primary is.")
253254
}
254255

255256
return nil

0 commit comments

Comments
 (0)