Skip to content

Commit 99697e9

Browse files
authored
PBM-887: add diagnostic command (#1043)
1 parent ecb9f2c commit 99697e9

File tree

5 files changed

+330
-0
lines changed

5 files changed

+330
-0
lines changed

cmd/pbm/diagnostic.go

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"os"
8+
"path/filepath"
9+
10+
"go.mongodb.org/mongo-driver/bson"
11+
12+
"github.com/percona/percona-backup-mongodb/pbm/errors"
13+
"github.com/percona/percona-backup-mongodb/sdk"
14+
)
15+
16+
type diagnosticOptions struct {
17+
path string
18+
opid string
19+
name string
20+
}
21+
22+
func handleDiagnostic(
23+
ctx context.Context,
24+
pbm *sdk.Client,
25+
opts diagnosticOptions,
26+
) (fmt.Stringer, error) {
27+
if opts.opid == "" && opts.name == "" {
28+
return nil, errors.New("--opid or --name must be provided")
29+
}
30+
31+
if opts.opid == "" {
32+
cid, err := sdk.FindCommandIDByName(ctx, pbm, opts.name)
33+
if err != nil {
34+
return nil, errors.Wrap(err, "find opid by name")
35+
}
36+
opts.opid = string(cid)
37+
}
38+
39+
report, err := sdk.Diagnostic(ctx, pbm, sdk.CommandID(opts.opid))
40+
if err != nil {
41+
return nil, err
42+
}
43+
44+
if fileInfo, err := os.Stat(opts.path); err != nil {
45+
if !os.IsNotExist(err) {
46+
return nil, errors.Wrap(err, "stat")
47+
}
48+
err = os.MkdirAll(opts.path, 0o777)
49+
if err != nil {
50+
return nil, errors.Wrap(err, "create path")
51+
}
52+
} else if !fileInfo.IsDir() {
53+
return nil, errors.Errorf("%s is not a dir", opts.path)
54+
}
55+
56+
err = writeToFile(opts.path, opts.opid+".report.json", report)
57+
if err != nil {
58+
return nil, errors.Wrapf(err,
59+
"failed to save %s", filepath.Join(opts.path, opts.opid+".report.json"))
60+
}
61+
62+
switch report.Command.Cmd {
63+
case sdk.CmdBackup:
64+
meta, err := pbm.GetBackupByOpID(ctx, opts.opid, sdk.GetBackupByNameOptions{})
65+
if err != nil {
66+
return nil, errors.Wrap(err, "get backup meta")
67+
}
68+
err = writeToFile(opts.path, opts.opid+".backup.json", meta)
69+
if err != nil {
70+
return nil, errors.Wrapf(err,
71+
"failed to save %s", filepath.Join(opts.path, opts.opid+".backup.json"))
72+
}
73+
case sdk.CmdRestore:
74+
meta, err := pbm.GetRestoreByOpID(ctx, opts.opid)
75+
if err != nil {
76+
return nil, errors.Wrap(err, "get restore meta")
77+
}
78+
err = writeToFile(opts.path, opts.opid+".restore.json", meta)
79+
if err != nil {
80+
return nil, errors.Wrapf(err,
81+
"failed to save %s", filepath.Join(opts.path, opts.opid+".restore.json"))
82+
}
83+
}
84+
85+
err = writeLogToFile(ctx, pbm, opts)
86+
if err != nil {
87+
return nil, errors.Wrap(err, "failed to save command log")
88+
}
89+
90+
return outMsg{""}, nil
91+
}
92+
93+
//nolint:nonamedreturns
94+
func writeLogToFile(ctx context.Context, pbm *sdk.Client, opts diagnosticOptions) (err error) {
95+
filename := filepath.Join(opts.path, opts.opid+".log")
96+
file, err := os.Create(filename)
97+
if err != nil {
98+
return err
99+
}
100+
defer func() {
101+
if err != nil {
102+
file.Close()
103+
os.Remove(filename)
104+
}
105+
}()
106+
107+
cur, err := sdk.CommandLogCursor(ctx, pbm, sdk.CommandID(opts.opid))
108+
if err != nil {
109+
return errors.Wrap(err, "open log cursor")
110+
}
111+
defer cur.Close(ctx)
112+
113+
eol := []byte("\n")
114+
for cur.Next(ctx) {
115+
rec, err := cur.Record()
116+
if err != nil {
117+
return errors.Wrap(err, "log: decode")
118+
}
119+
120+
data, err := bson.MarshalExtJSON(rec, true, true)
121+
if err != nil {
122+
return errors.Wrap(err, "log: encode")
123+
}
124+
125+
n, err := file.Write(data)
126+
if err != nil {
127+
return errors.Wrap(err, "log: write")
128+
}
129+
if n != len(data) {
130+
return errors.Wrap(io.ErrShortWrite, "log")
131+
}
132+
133+
n, err = file.Write(eol)
134+
if err != nil {
135+
return errors.Wrap(err, "log: write")
136+
}
137+
if n != len(eol) {
138+
return errors.Wrap(io.ErrShortWrite, "log")
139+
}
140+
}
141+
142+
err = cur.Err()
143+
if err != nil {
144+
return errors.Wrap(err, "log cursor")
145+
}
146+
147+
err = file.Close()
148+
if err != nil {
149+
return errors.Wrap(err, "failed to save file command.log")
150+
}
151+
152+
return nil
153+
}
154+
155+
func writeToFile(dirname, name string, val any) error {
156+
data, err := bson.MarshalExtJSONIndent(val, true, true, "", " ")
157+
if err != nil {
158+
return errors.Wrap(err, "marshal")
159+
}
160+
161+
file, err := os.Create(filepath.Join(dirname, name))
162+
if err != nil {
163+
return err
164+
}
165+
defer file.Close()
166+
167+
n, err := file.Write(data)
168+
if err != nil {
169+
return errors.Wrap(err, "write")
170+
}
171+
if n != len(data) {
172+
return io.ErrShortWrite
173+
}
174+
err = file.Close()
175+
if err != nil {
176+
return errors.Wrap(err, "close file")
177+
}
178+
179+
return nil
180+
}

cmd/pbm/main.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,16 @@ func main() {
440440
Short('c').
441441
StringVar(&describeRestoreOpts.cfg)
442442

443+
diagnosticCmd := pbmCmd.Command("diagnostic", "Create diagnostic report")
444+
diagnosticOpts := diagnosticOptions{}
445+
diagnosticCmd.Flag("path", "Path where files will be saved").
446+
Required().
447+
StringVar(&diagnosticOpts.path)
448+
diagnosticCmd.Flag("opid", "OPID/Command ID").
449+
StringVar(&diagnosticOpts.opid)
450+
diagnosticCmd.Flag("name", "Backup or Restore name").
451+
StringVar(&diagnosticOpts.name)
452+
443453
cmd, err := pbmCmd.DefaultEnvars().Parse(os.Args[1:])
444454
if err != nil {
445455
fmt.Fprintln(os.Stderr, "Error: parse command line parameters:", err)
@@ -549,6 +559,8 @@ func main() {
549559
out, err = status(ctx, conn, pbm, *mURL, statusOpts, pbmOutF == outJSONpretty)
550560
case describeRestoreCmd.FullCommand():
551561
out, err = describeRestore(ctx, conn, describeRestoreOpts, node)
562+
case diagnosticCmd.FullCommand():
563+
out, err = handleDiagnostic(ctx, pbm, diagnosticOpts)
552564
}
553565

554566
if err != nil {

pbm/log/history.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,73 @@ func LogGet(ctx context.Context, m connect.Client, r *LogRequest, limit int64) (
256256
func LogGetExactSeverity(ctx context.Context, m connect.Client, r *LogRequest, limit int64) (*Entries, error) {
257257
return fetch(ctx, m, r, limit, true)
258258
}
259+
260+
func GetFirstTSForOPID(ctx context.Context, conn connect.Client, opid string) (int64, error) {
261+
return getTSForOPIDImpl(ctx, conn, opid, 1)
262+
}
263+
264+
func GetLastTSForOPID(ctx context.Context, conn connect.Client, opid string) (int64, error) {
265+
return getTSForOPIDImpl(ctx, conn, opid, -1)
266+
}
267+
268+
func CommandLogCursor(
269+
ctx context.Context,
270+
conn connect.Client,
271+
opid string,
272+
) (*Cursor, error) {
273+
from, err := GetFirstTSForOPID(ctx, conn, opid)
274+
if err != nil {
275+
return nil, errors.Wrap(err, "get first opid ts")
276+
}
277+
till, err := GetLastTSForOPID(ctx, conn, opid)
278+
if err != nil {
279+
return nil, errors.Wrap(err, "get last opid ts")
280+
}
281+
282+
cur, err := conn.LogCollection().Find(ctx, bson.D{{"ts", bson.M{"$gte": from, "$lte": till}}})
283+
if err != nil {
284+
return nil, errors.Wrap(err, "log: create cursor")
285+
}
286+
287+
return &Cursor{cur: cur}, nil
288+
}
289+
290+
type Cursor struct {
291+
cur *mongo.Cursor
292+
}
293+
294+
func (c *Cursor) Close(ctx context.Context) error {
295+
return c.cur.Close(ctx)
296+
}
297+
298+
func (c *Cursor) Err() error {
299+
return c.cur.Err()
300+
}
301+
302+
func (c *Cursor) Next(ctx context.Context) bool {
303+
return c.cur.Next(ctx)
304+
}
305+
306+
func (c *Cursor) Record() (*Entry, error) {
307+
var e *Entry
308+
err := c.cur.Decode(&e)
309+
return e, err
310+
}
311+
312+
func getTSForOPIDImpl(
313+
ctx context.Context,
314+
conn connect.Client,
315+
opid string,
316+
sort int,
317+
) (int64, error) {
318+
raw, err := conn.LogCollection().FindOne(ctx,
319+
bson.D{{"opid", opid}},
320+
options.FindOne().SetSort(bson.D{{"ts", sort}}).SetProjection(bson.D{{"ts", 1}})).
321+
Raw()
322+
if err != nil {
323+
return 0, err
324+
}
325+
326+
ts, _ := raw.Lookup("ts").AsInt64OK()
327+
return ts, nil
328+
}

sdk/sdk.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ func NewClient(ctx context.Context, uri string) (*Client, error) {
146146
return &Client{conn: conn, node: inf.Me}, nil
147147
}
148148

149+
func CommandLogCursor(ctx context.Context, c *Client, cid CommandID) (*log.Cursor, error) {
150+
return log.CommandLogCursor(ctx, c.conn, string(cid))
151+
}
152+
149153
func WaitForAddProfile(ctx context.Context, client *Client, cid CommandID) error {
150154
lck := &lock.LockHeader{Type: ctrl.CmdAddConfigProfile, OPID: string(cid)}
151155
return waitOp(ctx, client.conn, lck)

sdk/util.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ package sdk
33
import (
44
"context"
55

6+
"go.mongodb.org/mongo-driver/bson"
67
"go.mongodb.org/mongo-driver/bson/primitive"
8+
"go.mongodb.org/mongo-driver/mongo"
9+
"go.mongodb.org/mongo-driver/mongo/options"
710

811
"github.com/percona/percona-backup-mongodb/pbm/backup"
912
"github.com/percona/percona-backup-mongodb/pbm/ctrl"
1013
"github.com/percona/percona-backup-mongodb/pbm/defs"
1114
"github.com/percona/percona-backup-mongodb/pbm/errors"
15+
"github.com/percona/percona-backup-mongodb/pbm/lock"
1216
"github.com/percona/percona-backup-mongodb/pbm/log"
1317
"github.com/percona/percona-backup-mongodb/pbm/topo"
1418
)
@@ -87,3 +91,63 @@ func WaitForResync(ctx context.Context, c *Client, cid CommandID) error {
8791
}
8892
}
8993
}
94+
95+
func FindCommandIDByName(ctx context.Context, c *Client, name string) (CommandID, error) {
96+
res := c.conn.CmdStreamCollection().FindOne(ctx,
97+
bson.D{{"$or", bson.A{
98+
bson.M{"backup.name": name},
99+
bson.M{"restore.name": name},
100+
}}},
101+
options.FindOne().SetProjection(bson.D{{"_id", 1}}))
102+
raw, err := res.Raw()
103+
if err != nil {
104+
if errors.Is(err, mongo.ErrNoDocuments) {
105+
return NoOpID, ErrNotFound
106+
}
107+
return NoOpID, err
108+
}
109+
110+
return CommandID(ctrl.OPID(raw.Lookup("_id").ObjectID()).String()), nil
111+
}
112+
113+
type DiagnosticReport struct {
114+
ClusterTime primitive.Timestamp `json:"cluster_time" bson:"cluster_time"`
115+
Command *Command `json:"command" bson:"command"`
116+
Members []topo.Shard `json:"replsets" bson:"replsets"`
117+
Agents []AgentStatus `json:"agents" bson:"agents"`
118+
Locks []lock.LockData `json:"locks,omitempty" bson:"locks,omitempty"`
119+
OpLocks []lock.LockData `json:"op_locks,omitempty" bson:"op_locks,omitempty"`
120+
}
121+
122+
func Diagnostic(ctx context.Context, c *Client, cid CommandID) (*DiagnosticReport, error) {
123+
var err error
124+
rv := &DiagnosticReport{}
125+
126+
rv.ClusterTime, err = topo.GetClusterTime(ctx, c.conn)
127+
if err != nil {
128+
return nil, errors.Wrap(err, "get cluster time")
129+
}
130+
rv.Command, err = c.CommandInfo(ctx, cid)
131+
if err != nil {
132+
return nil, errors.Wrap(err, "get command info")
133+
}
134+
rv.Members, err = topo.ClusterMembers(ctx, c.conn.MongoClient())
135+
if err != nil {
136+
return nil, errors.Wrap(err, "get members")
137+
}
138+
rv.Agents, err = topo.ListAgents(ctx, c.conn)
139+
if err != nil {
140+
return nil, errors.Wrap(err, "get agents")
141+
}
142+
143+
rv.Locks, err = lock.GetLocks(ctx, c.conn, &lock.LockHeader{})
144+
if err != nil {
145+
return nil, errors.Wrap(err, "get locks")
146+
}
147+
rv.OpLocks, err = lock.GetOpLocks(ctx, c.conn, &lock.LockHeader{})
148+
if err != nil {
149+
return nil, errors.Wrap(err, "get op locks")
150+
}
151+
152+
return rv, nil
153+
}

0 commit comments

Comments
 (0)