Skip to content

Commit 572870c

Browse files
PMM-9652 Fixed anti-pattern metric names (#450)
* PMM-9652 WIP initial test * PMM-9652 Updated top exporter Now metric names are properly built and the namespace is in a label. * PMM-9652 Fixed anti-pattern metric names Metric names shouldn't vahe variable data like database or collection name as part of the name. The namespace and other information like the index nane should be set in a label. This patch fixes the metric names in collstas, indexstats and topstats collectors. * Ran format * PMM-9652 Updated labels to have database+collection * Updated labels for top collector * Removed old comment * Format
1 parent 0894859 commit 572870c

10 files changed

+194
-58
lines changed

exporter/collstats_collector.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,7 @@ func (d *collstatsCollector) collect(ch chan<- prometheus.Metric) {
120120
logger.Debugf("$collStats metrics for %s.%s", database, collection)
121121
debugResult(logger, stats)
122122

123-
// Since all collections will have the same fields, we need to use a metric prefix (db+col)
124-
// to differentiate metrics between collection. Labels are being set only to matke it easier
125-
// to filter
126-
prefix := database + "." + collection
127-
123+
prefix := "collstats"
128124
labels := d.topologyInfo.baseLabels()
129125
labels["database"] = database
130126
labels["collection"] = collection

exporter/collstats_collector_test.go

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,24 +58,31 @@ func TestCollStatsCollector(t *testing.T) {
5858

5959
// The last \n at the end of this string is important
6060
expected := strings.NewReader(`
61-
# HELP mongodb_testdb_testcol_00_latencyStats_commands_latency testdb.testcol_00.latencyStats.commands.
62-
# TYPE mongodb_testdb_testcol_00_latencyStats_commands_latency untyped
63-
mongodb_testdb_testcol_00_latencyStats_commands_latency{collection="testcol_00",database="testdb"} 0
64-
# HELP mongodb_testdb_testcol_01_latencyStats_commands_latency testdb.testcol_01.latencyStats.commands.
65-
# TYPE mongodb_testdb_testcol_01_latencyStats_commands_latency untyped
66-
mongodb_testdb_testcol_01_latencyStats_commands_latency{collection="testcol_01",database="testdb"} 0
67-
# HELP mongodb_testdb_testcol_02_latencyStats_commands_latency testdb.testcol_02.latencyStats.commands.
68-
# TYPE mongodb_testdb_testcol_02_latencyStats_commands_latency untyped
69-
mongodb_testdb_testcol_02_latencyStats_commands_latency{collection="testcol_02",database="testdb"} 0` + "\n")
61+
# HELP mongodb_collstats_latencyStats_commands_latency collstats.latencyStats.commands.
62+
# TYPE mongodb_collstats_latencyStats_commands_latency untyped
63+
mongodb_collstats_latencyStats_commands_latency{collection="testcol_00",database="testdb"} 0
64+
mongodb_collstats_latencyStats_commands_latency{collection="testcol_01",database="testdb"} 0
65+
mongodb_collstats_latencyStats_commands_latency{collection="testcol_02",database="testdb"} 0
66+
# HELP mongodb_collstats_latencyStats_transactions_ops collstats.latencyStats.transactions.
67+
# TYPE mongodb_collstats_latencyStats_transactions_ops untyped
68+
mongodb_collstats_latencyStats_transactions_ops{collection="testcol_00",database="testdb"} 0
69+
mongodb_collstats_latencyStats_transactions_ops{collection="testcol_01",database="testdb"} 0
70+
mongodb_collstats_latencyStats_transactions_ops{collection="testcol_02",database="testdb"} 0
71+
# HELP mongodb_collstats_storageStats_capped collstats.storageStats.
72+
# TYPE mongodb_collstats_storageStats_capped untyped
73+
mongodb_collstats_storageStats_capped{collection="testcol_00",database="testdb"} 0
74+
mongodb_collstats_storageStats_capped{collection="testcol_01",database="testdb"} 0
75+
mongodb_collstats_storageStats_capped{collection="testcol_02",database="testdb"} 0` +
76+
"\n")
7077

7178
// Filter metrics for 2 reasons:
7279
// 1. The result is huge
7380
// 2. We need to check against know values. Don't use metrics that return counters like uptime
7481
// or counters like the number of transactions because they won't return a known value to compare
7582
filter := []string{
76-
"mongodb_testdb_testcol_00_latencyStats_commands_latency",
77-
"mongodb_testdb_testcol_01_latencyStats_commands_latency",
78-
"mongodb_testdb_testcol_02_latencyStats_commands_latency",
83+
"mongodb_collstats_latencyStats_commands_latency",
84+
"mongodb_collstats_storageStats_capped",
85+
"mongodb_collstats_latencyStats_transactions_ops",
7986
}
8087
err := testutil.CollectAndCompare(c, expected, filter...)
8188
assert.NoError(t, err)

exporter/common.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,12 @@ func nonSystemCollectionsCount(ctx context.Context, client *mongo.Client, includ
217217

218218
return count, nil
219219
}
220+
221+
func splitNamespace(ns string) (database, collection string) {
222+
parts := strings.Split(ns, ".")
223+
if len(parts) < 2 { // there is no collection?
224+
return parts[0], ""
225+
}
226+
227+
return parts[0], strings.Join(parts[1:], ".")
228+
}

exporter/common_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,28 @@ func TestListCollections(t *testing.T) {
148148
assert.Equal(t, 6, count)
149149
})
150150
}
151+
152+
func TestSplitNamespace(t *testing.T) {
153+
testCases := []struct {
154+
namespace string
155+
wantDatabase string
156+
wantCollection string
157+
}{
158+
{
159+
namespace: "db.collection",
160+
wantDatabase: "db",
161+
wantCollection: "collection",
162+
},
163+
{
164+
namespace: "db.this.is.a.valid.collection.name",
165+
wantDatabase: "db",
166+
wantCollection: "this.is.a.valid.collection.name",
167+
},
168+
}
169+
170+
for _, tc := range testCases {
171+
db, coll := splitNamespace(tc.namespace)
172+
assert.Equal(t, tc.wantDatabase, db)
173+
assert.Equal(t, tc.wantCollection, coll)
174+
}
175+
}

exporter/indexstats_collector.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,17 @@ func (d *indexstatsCollector) collect(ch chan<- prometheus.Metric) {
101101
continue
102102
}
103103

104-
logger.Debugf("indexStats for %s.%s", database, collection)
105-
debugResult(logger, stats)
104+
d.base.logger.Debugf("indexStats for %s.%s", database, collection)
105+
106+
debugResult(d.base.logger, stats)
106107

107108
for _, metric := range stats {
108109
// prefix and labels are needed to avoid duplicated metric names since the metrics are the
109110
// same, for different collections.
110-
prefix := fmt.Sprintf("%s_%s_%s", database, collection, metric["name"])
111+
prefix := "indexstats"
111112
labels := d.topologyInfo.baseLabels()
112-
labels["namespace"] = database + "." + collection
113+
labels["database"] = database
114+
labels["collection"] = collection
113115
labels["key_name"] = fmt.Sprintf("%s", metric["name"])
114116

115117
metrics := sanitizeMetrics(metric)

exporter/indexstats_collector_test.go

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,24 +69,15 @@ func TestIndexStatsCollector(t *testing.T) {
6969

7070
// The last \n at the end of this string is important
7171
expected := strings.NewReader(`
72-
# HELP mongodb_testdb_testcol_00_id_accesses_ops testdb_testcol_00__id_.accesses.
73-
# TYPE mongodb_testdb_testcol_00_id_accesses_ops untyped
74-
mongodb_testdb_testcol_00_id_accesses_ops{key_name="_id_",namespace="testdb.testcol_00"} 0
75-
# HELP mongodb_testdb_testcol_00_idx_01_accesses_ops testdb_testcol_00_idx_01.accesses.
76-
# TYPE mongodb_testdb_testcol_00_idx_01_accesses_ops untyped
77-
mongodb_testdb_testcol_00_idx_01_accesses_ops{key_name="idx_01",namespace="testdb.testcol_00"} 0
78-
# HELP mongodb_testdb_testcol_01_id_accesses_ops testdb_testcol_01__id_.accesses.
79-
# TYPE mongodb_testdb_testcol_01_id_accesses_ops untyped
80-
mongodb_testdb_testcol_01_id_accesses_ops{key_name="_id_",namespace="testdb.testcol_01"} 0
81-
# HELP mongodb_testdb_testcol_01_idx_01_accesses_ops testdb_testcol_01_idx_01.accesses.
82-
# TYPE mongodb_testdb_testcol_01_idx_01_accesses_ops untyped
83-
mongodb_testdb_testcol_01_idx_01_accesses_ops{key_name="idx_01",namespace="testdb.testcol_01"} 0
84-
# HELP mongodb_testdb_testcol_02_id_accesses_ops testdb_testcol_02__id_.accesses.
85-
# TYPE mongodb_testdb_testcol_02_id_accesses_ops untyped
86-
mongodb_testdb_testcol_02_id_accesses_ops{key_name="_id_",namespace="testdb.testcol_02"} 0
87-
# HELP mongodb_testdb_testcol_02_idx_01_accesses_ops testdb_testcol_02_idx_01.accesses.
88-
# TYPE mongodb_testdb_testcol_02_idx_01_accesses_ops untyped
89-
mongodb_testdb_testcol_02_idx_01_accesses_ops{key_name="idx_01",namespace="testdb.testcol_02"} 0` + "\n")
72+
# HELP mongodb_indexstats_accesses_ops indexstats.accesses.
73+
# TYPE mongodb_indexstats_accesses_ops untyped
74+
mongodb_indexstats_accesses_ops{collection="testcol_00",database="testdb",key_name="_id_"} 0
75+
mongodb_indexstats_accesses_ops{collection="testcol_00",database="testdb",key_name="idx_01"} 0
76+
mongodb_indexstats_accesses_ops{collection="testcol_01",database="testdb",key_name="_id_"} 0
77+
mongodb_indexstats_accesses_ops{collection="testcol_01",database="testdb",key_name="idx_01"} 0
78+
mongodb_indexstats_accesses_ops{collection="testcol_02",database="testdb",key_name="_id_"} 0
79+
mongodb_indexstats_accesses_ops{collection="testcol_02",database="testdb",key_name="idx_01"} 0` +
80+
"\n")
9081

9182
err := testutil.CollectAndCompare(c, expected)
9283
assert.NoError(t, err)

exporter/metrics.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,16 @@ func makeRawMetric(prefix, name string, value interface{}, labels map[string]str
161161

162162
fqName, label := nameAndLabel(prefix, name)
163163

164+
metricType := prometheus.UntypedValue
165+
if strings.HasSuffix(strings.ToLower(name), "count") {
166+
metricType = prometheus.CounterValue
167+
}
168+
164169
rm := &rawMetric{
165170
fqName: fqName,
166171
help: help,
167172
val: *f,
168-
vt: prometheus.UntypedValue,
173+
vt: metricType,
169174
ln: make([]string, 0, len(labels)),
170175
lv: make([]string, 0, len(labels)),
171176
}

exporter/metrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ func TestMakeRawMetric(t *testing.T) {
163163
ln: ln,
164164
lv: lv,
165165
val: *tc.wantVal,
166-
vt: 3,
166+
vt: prometheus.CounterValue,
167167
}
168168
}
169169

exporter/top_collector.go

Lines changed: 90 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ package exporter
1818

1919
import (
2020
"context"
21+
"fmt"
2122

2223
"github.com/prometheus/client_golang/prometheus"
2324
"github.com/sirupsen/logrus"
2425
"go.mongodb.org/mongo-driver/bson"
26+
"go.mongodb.org/mongo-driver/bson/primitive"
2527
"go.mongodb.org/mongo-driver/mongo"
2628
)
2729

@@ -33,12 +35,14 @@ type topCollector struct {
3335
topologyInfo labelsGetter
3436
}
3537

36-
// newTopCollector creates a collector for statistics on collection usage.
37-
func newTopCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *topCollector {
38-
return &topCollector{
39-
ctx: ctx,
40-
base: newBaseCollector(client, logger),
38+
var ErrInvalidOrMissingTotalsEntry = fmt.Errorf("Invalid or misssing totals entry in top results")
4139

40+
func newTopCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool,
41+
topology labelsGetter,
42+
) *topCollector {
43+
return &topCollector{
44+
ctx: ctx,
45+
base: newBaseCollector(client, logger),
4246
compatibleMode: compatible,
4347
topologyInfo: topology,
4448
}
@@ -59,7 +63,7 @@ func (d *topCollector) collect(ch chan<- prometheus.Metric) {
5963
cmd := bson.D{{Key: "top", Value: "1"}}
6064
res := client.Database("admin").RunCommand(d.ctx, cmd)
6165

62-
var m bson.M
66+
var m primitive.M
6367
if err := res.Decode(&m); err != nil {
6468
ch <- prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err)
6569
return
@@ -68,7 +72,85 @@ func (d *topCollector) collect(ch chan<- prometheus.Metric) {
6872
logrus.Debug("top result:")
6973
debugResult(logger, m)
7074

71-
for _, metric := range makeMetrics("top", m, d.topologyInfo.baseLabels(), d.compatibleMode) {
72-
ch <- metric
75+
totals, ok := m["totals"].(primitive.M)
76+
if !ok {
77+
ch <- prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(ErrInvalidOrMissingTotalsEntry),
78+
ErrInvalidOrMissingTotalsEntry)
79+
}
80+
81+
/*
82+
The top command will return a structure with a key named totals and it is a map
83+
where the key is the collection namespace and for each collection there are per
84+
collection usage statistics.
85+
Example: rs1:SECONDARY> db.adminCommand({"top": 1});
86+
87+
{
88+
"totals" : {
89+
"note" : "all times in microseconds",
90+
"admin.system.roles" : {
91+
"total" : {
92+
"time" : 41,
93+
"count" : 1
94+
},
95+
"readLock" : {
96+
"time" : 41,
97+
"count" : 1
98+
},
99+
"writeLock" : {
100+
"time" : 0,
101+
"count" : 0
102+
},
103+
"queries" : {
104+
"time" : 41,
105+
"count" : 1
106+
},
107+
"getmore" : {
108+
"time" : 0,
109+
"count" : 0
110+
},
111+
"insert" : {
112+
"time" : 0,
113+
"count" : 0
114+
},
115+
"update" : {
116+
"time" : 0,
117+
"count" : 0
118+
},
119+
"remove" : {
120+
"time" : 0,
121+
"count" : 0
122+
},
123+
"commands" : {
124+
"time" : 0,
125+
"count" : 0
126+
}
127+
},
128+
"admin.system.version" : {
129+
"total" : {
130+
"time" : 63541,
131+
"count" : 218
132+
},
133+
134+
If we pass this structure to the makeMetrics function, we will have metric names with the form of
135+
prefix + namespace + metric like mongodb_top_totals_admin.system.role_readlock_count.
136+
Having the namespace as part of the metric is a Prometheus anti pattern and diffucults grouping
137+
metrics in Grafana. For this reason, we need to manually loop through the metric in the totals key
138+
and pass the namespace as a label to the makeMetrics function.
139+
*/
140+
141+
for namespace, metrics := range totals {
142+
labels := d.topologyInfo.baseLabels()
143+
db, coll := splitNamespace(namespace)
144+
labels["database"] = db
145+
labels["collection"] = coll
146+
147+
mm, ok := metrics.(primitive.M) // ingore entries like -> "note" : "all times in microseconds"
148+
if !ok {
149+
continue
150+
}
151+
152+
for _, metric := range makeMetrics("top", mm, labels, d.compatibleMode) {
153+
ch <- metric
154+
}
73155
}
74156
}

exporter/top_collector_test.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package exporter
1818

1919
import (
2020
"context"
21-
"strings"
2221
"testing"
2322
"time"
2423

@@ -39,18 +38,38 @@ func TestTopCollector(t *testing.T) {
3938

4039
c := newTopCollector(ctx, client, logrus.New(), false, ti)
4140

42-
// The last \n at the end of this string is important
43-
expected := strings.NewReader(`
44-
# HELP mongodb_top_ok top.
45-
# TYPE mongodb_top_ok untyped
46-
mongodb_top_ok 1` + "\n")
4741
// Filter metrics for 2 reasons:
4842
// 1. The result is huge
4943
// 2. We need to check against know values. Don't use metrics that return counters like uptime
5044
// or counters like the number of transactions because they won't return a known value to compare
5145
filter := []string{
52-
"mongodb_top_ok",
46+
"mongodb_top_update_count",
5347
}
54-
err := testutil.CollectAndCompare(c, expected, filter...)
55-
assert.NoError(t, err)
48+
filter = nil
49+
count := testutil.CollectAndCount(c, filter...)
50+
51+
/*
52+
The number of metrics is not a constant. It depends on the number of collections in the db.
53+
It looks like:
54+
55+
# HELP mongodb_top_update_count top.update.
56+
# TYPE mongodb_top_update_count untyped
57+
mongodb_top_update_count{namespace="admin.system.roles"} 0
58+
mongodb_top_update_count{namespace="admin.system.version"} 3
59+
mongodb_top_update_count{namespace="config.cache.chunks.config.system.sessions"} 0
60+
mongodb_top_update_count{namespace="config.cache.collections"} 1540
61+
mongodb_top_update_count{namespace="config.image_collection"} 0
62+
mongodb_top_update_count{namespace="config.system.sessions"} 12
63+
mongodb_top_update_count{namespace="config.transaction_coordinators"} 0
64+
mongodb_top_update_count{namespace="config.transactions"} 0
65+
mongodb_top_update_count{namespace="local.oplog.rs"} 0
66+
mongodb_top_update_count{namespace="local.replset.election"} 0
67+
mongodb_top_update_count{namespace="local.replset.minvalid"} 0
68+
mongodb_top_update_count{namespace="local.replset.oplogTruncateAfterPoint"} 0
69+
mongodb_top_update_count{namespace="local.startup_log"} 0
70+
mongodb_top_update_count{namespace="local.system.replset"} 0
71+
mongodb_top_update_count{namespace="local.system.rollback.id"} 0
72+
73+
*/
74+
assert.True(t, count > 0)
5675
}

0 commit comments

Comments
 (0)