Skip to content

Commit d6e9d6b

Browse files
committed
K8SPSMDB-1296: improve readiness probe
https://perconadev.atlassian.net/browse/K8SPSMDB-1296
1 parent be033fb commit d6e9d6b

File tree

183 files changed

+1168
-75
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

183 files changed

+1168
-75
lines changed

cmd/mongodb-healthcheck/db/db.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,27 @@ var (
2929
)
3030

3131
func Dial(ctx context.Context, conf *Config) (mongo.Client, error) {
32-
if err := conf.configureTLS(); err != nil {
32+
log := logf.FromContext(ctx).WithName("Dial")
33+
ctx = logf.IntoContext(ctx, log)
34+
35+
if err := conf.configureTLS(ctx); err != nil {
3336
return nil, errors.Wrap(err, "configure TLS")
3437
}
3538

36-
log := logf.FromContext(ctx)
3739
log.V(1).Info("Connecting to mongodb", "hosts", conf.Hosts, "ssl", conf.SSL.Enabled, "ssl_insecure", conf.SSL.Insecure)
3840

3941
if conf.Username != "" && conf.Password != "" {
4042
log.V(1).Info("Enabling authentication for session", "user", conf.Username)
4143
}
4244

43-
cl, err := mongo.Dial(&conf.Config)
45+
cl, err := mongo.Dial(ctx, &conf.Config)
4446
if err != nil {
4547
cfg := conf.Config
4648
cfg.Direct = true
4749
cfg.ReplSetName = ""
48-
cl, err = mongo.Dial(&cfg)
50+
cl, err = mongo.Dial(ctx, &cfg)
4951
if err != nil {
50-
return nil, errors.Wrap(err, "filed to dial mongo")
52+
return nil, errors.Wrap(err, "failed to dial mongo")
5153
}
5254
}
5355

cmd/mongodb-healthcheck/db/ssl.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package db
1616

1717
import (
18+
"context"
1819
"crypto/tls"
1920
"crypto/x509"
2021
"os"
@@ -40,8 +41,8 @@ func (sc *SSLConfig) loadCaCertificate() (*x509.CertPool, error) {
4041
return certificates, nil
4142
}
4243

43-
func (cnf *Config) configureTLS() error {
44-
log := logf.Log
44+
func (cnf *Config) configureTLS(ctx context.Context) error {
45+
log := logf.FromContext(ctx).WithName("configureTLS")
4546

4647
if !cnf.SSL.Enabled {
4748
return nil
@@ -72,7 +73,7 @@ func (cnf *Config) configureTLS() error {
7273
return errors.Wrapf(err, "check if file with name %s exists", cnf.SSL.CAFile)
7374
}
7475

75-
log.V(1).Info("Loading SSL/TLS Certificate Authority: %s", "ca", cnf.SSL.CAFile)
76+
log.V(1).Info("Loading SSL/TLS Certificate Authority", "ca", cnf.SSL.CAFile)
7677
ca, err := cnf.SSL.loadCaCertificate()
7778
if err != nil {
7879
return errors.Wrapf(err, "load client CAs from %s", cnf.SSL.CAFile)

cmd/mongodb-healthcheck/db/ssl_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ func TestSSLNotEnabled(t *testing.T) {
1616
},
1717
}
1818

19-
if err := cfg.configureTLS(); err != nil {
19+
if err := cfg.configureTLS(t.Context()); err != nil {
2020
t.Fatalf("TLS configuration failed: %s", err)
2121
}
2222

@@ -32,7 +32,7 @@ func TestSSLEnabled(t *testing.T) {
3232
},
3333
}
3434

35-
if err := cfg.configureTLS(); err != nil {
35+
if err := cfg.configureTLS(t.Context()); err != nil {
3636
t.Fatalf("TLS configuration failed: %s", err)
3737
}
3838

@@ -49,7 +49,7 @@ func TestPEMKeyFileDoesNotExists(t *testing.T) {
4949
},
5050
}
5151

52-
err := cfg.configureTLS()
52+
err := cfg.configureTLS(t.Context())
5353
if err == nil {
5454
t.Fatal("Expected TLS config to fail, but it returned no error")
5555
}
@@ -71,7 +71,7 @@ func TestCAFileDoesNotExists(t *testing.T) {
7171
},
7272
}
7373

74-
err := cfg.configureTLS()
74+
err := cfg.configureTLS(t.Context())
7575
if err == nil {
7676
t.Fatal("Expected TLS config to fail, but it returned no error")
7777
}

cmd/mongodb-healthcheck/healthcheck/health.go

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,9 @@ package healthcheck
1616

1717
import (
1818
"context"
19-
"encoding/json"
2019

21-
v "github.com/hashicorp/go-version"
2220
"github.com/pkg/errors"
2321
"go.mongodb.org/mongo-driver/bson"
24-
"go.mongodb.org/mongo-driver/bson/primitive"
2522
logf "sigs.k8s.io/controller-runtime/pkg/log"
2623

2724
"github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db"
@@ -32,6 +29,7 @@ var ErrNoReplsetConfigStr = "(NotYetInitialized) no replset config has been rece
3229

3330
func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) {
3431
log := logf.FromContext(ctx).WithName("HealthCheckMongosLiveness")
32+
ctx = logf.IntoContext(ctx, log)
3533

3634
client, err := db.Dial(ctx, cnf)
3735
if err != nil {
@@ -58,6 +56,7 @@ func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error)
5856

5957
func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelaySeconds int64) (_ *mongo.MemberState, err error) {
6058
log := logf.FromContext(ctx).WithName("HealthCheckMongodLiveness")
59+
ctx = logf.IntoContext(ctx, log)
6160

6261
client, err := db.Dial(ctx, cnf)
6362
if err != nil {
@@ -74,50 +73,14 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
7473
return nil, errors.Wrap(err, "get isMaster response")
7574
}
7675

77-
buildInfo, err := client.RSBuildInfo(ctx)
76+
rsStatus, err := getStatus(ctx, client)
7877
if err != nil {
79-
return nil, errors.Wrap(err, "get buildInfo response")
80-
}
81-
82-
replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}}
83-
mongoVersion := v.Must(v.NewVersion(buildInfo.Version))
84-
if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 {
85-
// https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax
86-
replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1})
87-
}
88-
89-
res := client.Database("admin").RunCommand(ctx, replSetStatusCommand)
90-
if res.Err() != nil {
91-
// if we come this far, it means db connection was successful
92-
// standalone mongod nodes in an unmanaged cluster doesn't need
93-
// to die before they added to a replset
94-
if res.Err().Error() == ErrNoReplsetConfigStr {
78+
if err.Error() == ErrNoReplsetConfigStr {
9579
state := mongo.MemberStateUnknown
96-
log.V(1).Info("replSetGetStatus failed", "err", res.Err().Error(), "state", state)
80+
log.V(1).Info("replSetGetStatus failed", "err", err.Error(), "state", state)
9781
return &state, nil
9882
}
99-
return nil, errors.Wrap(res.Err(), "get replsetGetStatus response")
100-
}
101-
102-
// this is a workaround to fix decoding of empty interfaces
103-
// https://jira.mongodb.org/browse/GODRIVER-988
104-
rsStatus := ReplSetStatus{}
105-
tempResult := bson.M{}
106-
err = res.Decode(&tempResult)
107-
if err != nil {
108-
return nil, errors.Wrap(err, "decode replsetGetStatus response")
109-
}
110-
111-
if err == nil {
112-
result, err := json.Marshal(tempResult)
113-
if err != nil {
114-
return nil, errors.Wrap(err, "marshal temp result")
115-
}
116-
117-
err = json.Unmarshal(result, &rsStatus)
118-
if err != nil {
119-
return nil, errors.Wrap(err, "unmarshal temp result")
120-
}
83+
return nil, errors.Wrap(err, "get replSetGetStatus response")
12184
}
12285

12386
oplogRs := OplogRs{}

cmd/mongodb-healthcheck/healthcheck/readiness.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package healthcheck
1717
import (
1818
"context"
1919
"net"
20+
"time"
2021

2122
"github.com/pkg/errors"
2223
"go.mongodb.org/mongo-driver/bson"
@@ -27,21 +28,50 @@ import (
2728
)
2829

2930
// MongodReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness
30-
func MongodReadinessCheck(ctx context.Context, addr string) error {
31+
func MongodReadinessCheck(ctx context.Context, cnf *db.Config) error {
3132
log := logf.FromContext(ctx).WithName("MongodReadinessCheck")
33+
ctx = logf.IntoContext(ctx, log)
3234

3335
var d net.Dialer
3436

37+
addr := cnf.Hosts[0]
3538
log.V(1).Info("Connecting to " + addr)
3639
conn, err := d.DialContext(ctx, "tcp", addr)
3740
if err != nil {
3841
return errors.Wrap(err, "dial")
3942
}
40-
return conn.Close()
43+
if err := conn.Close(); err != nil {
44+
return err
45+
}
46+
47+
s, err := func() (ReplSetStatus, error) {
48+
cnf.Timeout = time.Second
49+
client, err := db.Dial(ctx, cnf)
50+
if err != nil {
51+
return ReplSetStatus{}, errors.Wrap(err, "connection error")
52+
}
53+
defer func() {
54+
if derr := client.Disconnect(ctx); derr != nil && err == nil {
55+
err = errors.Wrap(derr, "failed to disconnect")
56+
}
57+
}()
58+
return getStatus(ctx, client)
59+
}()
60+
if err != nil {
61+
log.Error(err, "Failed to get replset status")
62+
return nil
63+
}
64+
65+
if err := CheckState(s, 0, 0); err != nil {
66+
return errors.Wrap(err, "check state")
67+
}
68+
69+
return nil
4170
}
4271

4372
func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) {
4473
log := logf.FromContext(ctx).WithName("MongosReadinessCheck")
74+
ctx = logf.IntoContext(ctx, log)
4575

4676
client, err := db.Dial(ctx, cnf)
4777
if err != nil {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package healthcheck
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
7+
v "github.com/hashicorp/go-version"
8+
"github.com/pkg/errors"
9+
"go.mongodb.org/mongo-driver/bson"
10+
"go.mongodb.org/mongo-driver/bson/primitive"
11+
12+
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
13+
)
14+
15+
func getStatus(ctx context.Context, client mongo.Client) (ReplSetStatus, error) {
16+
buildInfo, err := client.RSBuildInfo(ctx)
17+
if err != nil {
18+
return ReplSetStatus{}, errors.Wrap(err, "get buildInfo response")
19+
}
20+
21+
replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}}
22+
mongoVersion := v.Must(v.NewVersion(buildInfo.Version))
23+
if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 {
24+
// https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax
25+
replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1})
26+
}
27+
28+
res := client.Database("admin").RunCommand(ctx, replSetStatusCommand)
29+
if res.Err() != nil {
30+
if res.Err().Error() == ErrNoReplsetConfigStr {
31+
return ReplSetStatus{}, errors.New(ErrNoReplsetConfigStr)
32+
}
33+
return ReplSetStatus{}, errors.Wrap(res.Err(), "get replsetGetStatus response")
34+
}
35+
36+
// this is a workaround to fix decoding of empty interfaces
37+
// https://jira.mongodb.org/browse/GODRIVER-988
38+
rsStatus := ReplSetStatus{}
39+
tempResult := bson.M{}
40+
if err := res.Decode(&tempResult); err != nil {
41+
return ReplSetStatus{}, errors.Wrap(err, "decode replsetGetStatus response")
42+
}
43+
result, err := json.Marshal(tempResult)
44+
if err != nil {
45+
return ReplSetStatus{}, errors.Wrap(err, "marshal temp result")
46+
}
47+
if err = json.Unmarshal(result, &rsStatus); err != nil {
48+
return ReplSetStatus{}, errors.Wrap(err, "unmarshal temp result")
49+
}
50+
return rsStatus, nil
51+
}

cmd/mongodb-healthcheck/tool/tool.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func (app *App) Run(ctx context.Context) error {
111111
switch *component {
112112

113113
case "mongod":
114-
err := healthcheck.MongodReadinessCheck(ctx, cnf.Hosts[0])
114+
err := healthcheck.MongodReadinessCheck(ctx, cnf)
115115
if err != nil {
116116
return errors.Wrap(err, "member failed Kubernetes readiness check")
117117
}

e2e-tests/arbiter/compare/statefulset_arbiter-clusterip-rs0-arbiter-oc.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ spec:
113113
- readiness
114114
- --component
115115
- mongod
116+
- --ssl
117+
- --sslInsecure
118+
- --sslCAFile
119+
- /etc/mongodb-ssl/ca.crt
120+
- --sslPEMKeyFile
121+
- /tmp/tls.pem
116122
failureThreshold: 8
117123
initialDelaySeconds: 10
118124
periodSeconds: 3

e2e-tests/arbiter/compare/statefulset_arbiter-clusterip-rs0-arbiter.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ spec:
107107
- readiness
108108
- --component
109109
- mongod
110+
- --ssl
111+
- --sslInsecure
112+
- --sslCAFile
113+
- /etc/mongodb-ssl/ca.crt
114+
- --sslPEMKeyFile
115+
- /tmp/tls.pem
110116
failureThreshold: 8
111117
initialDelaySeconds: 10
112118
periodSeconds: 3

e2e-tests/arbiter/compare/statefulset_arbiter-rs0-arbiter-oc.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ spec:
114114
- readiness
115115
- --component
116116
- mongod
117+
- --ssl
118+
- --sslInsecure
119+
- --sslCAFile
120+
- /etc/mongodb-ssl/ca.crt
121+
- --sslPEMKeyFile
122+
- /tmp/tls.pem
117123
failureThreshold: 8
118124
initialDelaySeconds: 10
119125
periodSeconds: 3

0 commit comments

Comments
 (0)