Skip to content

Commit 607f255

Browse files
authored
Add graph CDEF metric scaling and normalize metrics as int64s (#26)
2 parents 098e681 + 6fdcc14 commit 607f255

File tree

14 files changed

+140
-86
lines changed

14 files changed

+140
-86
lines changed

pkg/check/descriptor.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ type MetricDef struct {
1313

1414
// Unit is the unit of measurement for graphs and display (e.g. "ms").
1515
Unit string
16+
17+
// Scale is the divisor applied to convert the raw stored value to
18+
// the display unit. For example, ping stores microseconds but
19+
// displays milliseconds, so Scale is 1000.
20+
// A value of 0 or 1 means no scaling is applied.
21+
Scale int
1622
}
1723

1824
// Descriptor declares static metadata about a check type, including

pkg/check/descriptor_test.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ func TestDescriptor_ZeroValue(t *testing.T) {
1414
func TestDescriptor_WithMetrics(t *testing.T) {
1515
d := Descriptor{
1616
Metrics: []MetricDef{
17-
{ResultKey: "latency_us", DSName: "latency", Label: "latency", Unit: "ms"},
17+
{ResultKey: "latency_us", DSName: "latency", Label: "latency", Unit: "ms", Scale: 1000},
1818
{ResultKey: "rx_bytes", DSName: "rx", Label: "received", Unit: "bytes"},
1919
},
2020
}
@@ -24,7 +24,24 @@ func TestDescriptor_WithMetrics(t *testing.T) {
2424
if d.Metrics[0].ResultKey != "latency_us" {
2525
t.Errorf("expected first ResultKey 'latency_us', got %q", d.Metrics[0].ResultKey)
2626
}
27+
if d.Metrics[0].Scale != 1000 {
28+
t.Errorf("expected first Scale 1000, got %d", d.Metrics[0].Scale)
29+
}
2730
if d.Metrics[1].DSName != "rx" {
2831
t.Errorf("expected second DSName 'rx', got %q", d.Metrics[1].DSName)
2932
}
3033
}
34+
35+
func TestDescriptor_ScaleZeroMeansNoScaling(t *testing.T) {
36+
d := MetricDef{ResultKey: "rtt_ms", DSName: "rtt", Label: "rtt", Unit: "ms", Scale: 0}
37+
if d.Scale != 0 {
38+
t.Errorf("expected Scale 0, got %d", d.Scale)
39+
}
40+
}
41+
42+
func TestDescriptor_ScaleOneMeansNoScaling(t *testing.T) {
43+
d := MetricDef{ResultKey: "rtt_ms", DSName: "rtt", Label: "rtt", Unit: "ms", Scale: 1}
44+
if d.Scale != 1 {
45+
t.Errorf("expected Scale 1, got %d", d.Scale)
46+
}
47+
}

pkg/check/ping/ping.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const (
3030
// Desc describes the metrics produced by a ping check.
3131
var Desc = check.Descriptor{
3232
Metrics: []check.MetricDef{
33-
{ResultKey: "latency_us", DSName: "latency", Label: "latency", Unit: "ms"},
33+
{ResultKey: "latency_us", DSName: "latency", Label: "latency", Unit: "ms", Scale: 1000},
3434
},
3535
}
3636

@@ -126,8 +126,8 @@ func (p *Ping) Run(ctx context.Context) check.Result {
126126
return check.Result{
127127
Timestamp: now,
128128
Success: true,
129-
Metrics: map[string]float64{
130-
"latency_us": float64(latency.Microseconds()),
129+
Metrics: map[string]int64{
130+
"latency_us": int64(latency.Microseconds()),
131131
},
132132
}
133133
}

pkg/check/ping/ping_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ func TestDesc(t *testing.T) {
267267
if m.Unit != "ms" {
268268
t.Errorf("expected Unit 'ms', got %q", m.Unit)
269269
}
270+
if m.Scale != 1000 {
271+
t.Errorf("expected Scale 1000, got %d", m.Scale)
272+
}
270273
}
271274

272275
func TestRegistryIntegration(t *testing.T) {
@@ -291,6 +294,9 @@ func TestRegistryIntegration(t *testing.T) {
291294
if len(desc.Metrics) != 1 || desc.Metrics[0].ResultKey != "latency_us" {
292295
t.Errorf("unexpected descriptor: %+v", desc)
293296
}
297+
if desc.Metrics[0].Scale != 1000 {
298+
t.Errorf("expected Scale 1000, got %d", desc.Metrics[0].Scale)
299+
}
294300
}
295301

296302
// TestRun_Localhost actually pings localhost. Requires ping binary on PATH.
@@ -317,7 +323,7 @@ func TestRun_Localhost(t *testing.T) {
317323
t.Fatal("expected latency_us metric")
318324
}
319325
if latency <= 0 {
320-
t.Errorf("expected positive latency, got %f", latency)
326+
t.Errorf("expected positive latency, got %d", latency)
321327
}
322328
}
323329

pkg/check/result.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type Result struct {
1515
// Metrics holds named measurements from the check execution.
1616
// For example, a ping check might set {"latency_us": 1234.0}.
1717
// An empty or nil map is valid for checks that only report success/failure.
18-
Metrics map[string]float64
18+
Metrics map[string]int64
1919

2020
// Err holds any error encountered during check execution.
2121
// A non-nil Err generally corresponds to Success being false,

pkg/check/result_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ func TestResult_WithMetrics(t *testing.T) {
2525
r := Result{
2626
Timestamp: time.Now(),
2727
Success: true,
28-
Metrics: map[string]float64{"latency_us": 1234.5},
28+
Metrics: map[string]int64{"latency_us": 12345},
2929
}
3030
if !r.Success {
3131
t.Error("expected success")
3232
}
33-
if v, ok := r.Metrics["latency_us"]; !ok || v != 1234.5 {
34-
t.Errorf("expected latency_us=1234.5, got %v", v)
33+
if v, ok := r.Metrics["latency_us"]; !ok || v != 12345 {
34+
t.Errorf("expected latency_us=12345, got %v", v)
3535
}
3636
}

pkg/check/status.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func (s *Status) Alive() bool {
2828
// Metric returns the value of a named metric from the last result.
2929
// Returns the value and true if found, or 0 and false if not present
3030
// or the last check failed.
31-
func (s *Status) Metric(key string) (float64, bool) {
31+
func (s *Status) Metric(key string) (int64, bool) {
3232
s.mu.RLock()
3333
defer s.mu.RUnlock()
3434
if !s.lastResult.Success || s.lastResult.Metrics == nil {
@@ -66,9 +66,9 @@ func (s *Status) Snapshot() StatusSnapshot {
6666
defer s.mu.RUnlock()
6767

6868
// Deep copy the metrics map so the snapshot is independent
69-
var metrics map[string]float64
69+
var metrics map[string]int64
7070
if s.lastResult.Metrics != nil {
71-
metrics = make(map[string]float64, len(s.lastResult.Metrics))
71+
metrics = make(map[string]int64, len(s.lastResult.Metrics))
7272
for k, v := range s.lastResult.Metrics {
7373
metrics[k] = v
7474
}
@@ -84,6 +84,6 @@ func (s *Status) Snapshot() StatusSnapshot {
8484
// StatusSnapshot is a point-in-time copy of Status fields.
8585
type StatusSnapshot struct {
8686
Alive bool
87-
Metrics map[string]float64
87+
Metrics map[string]int64
8888
LastUpdate int64
8989
}

pkg/check/status_test.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ func TestNewStatus_ZeroValues(t *testing.T) {
1111
t.Error("new status should not be alive")
1212
}
1313
if v, ok := s.Metric("latency_us"); ok {
14-
t.Errorf("new status should have no metrics, got latency_us=%f", v)
14+
t.Errorf("new status should have no metrics, got latency_us=%d", v)
1515
}
1616
if s.LastUpdate() != 0 {
1717
t.Errorf("new status should have zero last update, got %d", s.LastUpdate())
@@ -22,7 +22,7 @@ func TestStatus_SetResult_Success(t *testing.T) {
2222
s := NewStatus()
2323
s.SetResult(Result{
2424
Success: true,
25-
Metrics: map[string]float64{"latency_us": 1234.0},
25+
Metrics: map[string]int64{"latency_us": 1234},
2626
})
2727

2828
if !s.Alive() {
@@ -32,8 +32,8 @@ func TestStatus_SetResult_Success(t *testing.T) {
3232
if !ok {
3333
t.Fatal("expected latency_us metric to be present")
3434
}
35-
if v != 1234.0 {
36-
t.Errorf("expected latency_us=1234.0, got %f", v)
35+
if v != 1234 {
36+
t.Errorf("expected latency_us=1234.0, got %d", v)
3737
}
3838
}
3939

@@ -42,7 +42,7 @@ func TestStatus_SetResult_Failure(t *testing.T) {
4242
// First set it alive
4343
s.SetResult(Result{
4444
Success: true,
45-
Metrics: map[string]float64{"latency_us": 1000.0},
45+
Metrics: map[string]int64{"latency_us": 1000},
4646
})
4747
// Then fail
4848
s.SetResult(Result{
@@ -61,7 +61,7 @@ func TestStatus_SetResult_SuccessWithoutMetrics(t *testing.T) {
6161
s := NewStatus()
6262
s.SetResult(Result{
6363
Success: true,
64-
Metrics: map[string]float64{},
64+
Metrics: map[string]int64{},
6565
})
6666

6767
if !s.Alive() {
@@ -76,19 +76,19 @@ func TestStatus_Metric_MultipleMetrics(t *testing.T) {
7676
s := NewStatus()
7777
s.SetResult(Result{
7878
Success: true,
79-
Metrics: map[string]float64{
80-
"latency_us": 1234.0,
81-
"response_code": 200.0,
79+
Metrics: map[string]int64{
80+
"latency_us": 12340,
81+
"response_code": 2000,
8282
},
8383
})
8484

8585
v, ok := s.Metric("latency_us")
86-
if !ok || v != 1234.0 {
87-
t.Errorf("expected latency_us=1234.0, got %f (ok=%v)", v, ok)
86+
if !ok || v != 12340 {
87+
t.Errorf("expected latency_us=12340, got %d (ok=%v)", v, ok)
8888
}
8989
v, ok = s.Metric("response_code")
90-
if !ok || v != 200.0 {
91-
t.Errorf("expected response_code=200.0, got %f (ok=%v)", v, ok)
90+
if !ok || v != 2000 {
91+
t.Errorf("expected response_code=2000, got %d (ok=%v)", v, ok)
9292
}
9393
if _, ok := s.Metric("nonexistent"); ok {
9494
t.Error("expected nonexistent metric to not be found")
@@ -108,7 +108,7 @@ func TestStatus_Snapshot(t *testing.T) {
108108
s := NewStatus()
109109
s.SetResult(Result{
110110
Success: true,
111-
Metrics: map[string]float64{"latency_us": 5678.0},
111+
Metrics: map[string]int64{"latency_us": 5678},
112112
})
113113
s.SetLastUpdate(1700000000)
114114

@@ -117,8 +117,8 @@ func TestStatus_Snapshot(t *testing.T) {
117117
if !snap.Alive {
118118
t.Error("snapshot should be alive")
119119
}
120-
if v, ok := snap.Metrics["latency_us"]; !ok || v != 5678.0 {
121-
t.Errorf("snapshot metrics: expected latency_us=5678.0, got %v (ok=%v)", v, ok)
120+
if v, ok := snap.Metrics["latency_us"]; !ok || v != 5678 {
121+
t.Errorf("snapshot metrics: expected latency_us=5678, got %v (ok=%v)", v, ok)
122122
}
123123
if snap.LastUpdate != 1700000000 {
124124
t.Errorf("snapshot last update: expected 1700000000, got %d", snap.LastUpdate)
@@ -129,7 +129,7 @@ func TestStatus_Snapshot_Independent(t *testing.T) {
129129
s := NewStatus()
130130
s.SetResult(Result{
131131
Success: true,
132-
Metrics: map[string]float64{"latency_us": 1000.0},
132+
Metrics: map[string]int64{"latency_us": 1000},
133133
})
134134

135135
snap := s.Snapshot()
@@ -150,18 +150,18 @@ func TestStatus_Snapshot_MetricsMapIndependent(t *testing.T) {
150150
s := NewStatus()
151151
s.SetResult(Result{
152152
Success: true,
153-
Metrics: map[string]float64{"latency_us": 1000.0},
153+
Metrics: map[string]int64{"latency_us": 1000},
154154
})
155155

156156
snap := s.Snapshot()
157157

158158
// Mutate the snapshot's metrics map
159-
snap.Metrics["latency_us"] = 9999.0
159+
snap.Metrics["latency_us"] = 9999
160160

161161
// Status should be unaffected
162162
v, ok := s.Metric("latency_us")
163-
if !ok || v != 1000.0 {
164-
t.Errorf("mutating snapshot should not affect status, got %f", v)
163+
if !ok || v != 1000 {
164+
t.Errorf("mutating snapshot should not affect status, got %d", v)
165165
}
166166
}
167167

@@ -176,7 +176,7 @@ func TestStatus_ConcurrentAccess(t *testing.T) {
176176
defer wg.Done()
177177
s.SetResult(Result{
178178
Success: true,
179-
Metrics: map[string]float64{"latency_us": float64(n)},
179+
Metrics: map[string]int64{"latency_us": int64(n)},
180180
})
181181
s.SetLastUpdate(int64(n))
182182
}(i)

pkg/rrd/graph.go

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type graph struct {
1818
timeLength string // Time length for the graph (e.g., "4h" "1d")
1919
dsName string // RRD data source name (e.g., "latency")
2020
unit string // Unit of measurement (e.g., "ms")
21+
scale int // Divisor to convert raw value to display unit (0 or 1 = no scaling)
2122
consolidationFunction string // Consolidation function (e.g., "AVERAGE" "MAX")
2223
color string // Metric color (e.g., "#FF0001" (red))
2324
comment string // Comment at bottom of graph
@@ -36,12 +37,13 @@ type graph struct {
3637
// - dsName: The RRD data source name (e.g., "latency").
3738
// - label: The human-readable label for the metric (e.g., "latency").
3839
// - unit: The unit of measurement (e.g., "ms").
40+
// - scale: The divisor to convert the raw stored value to display unit (0 or 1 = no scaling).
3941
// - logger: The logger instance.
4042
//
4143
// Returns:
4244
// - *Graph: A pointer to the newly created Graph struct.
4345
// - error: An error if something went wrong during the initialization.
44-
func newGraph(host string, graphDir string, rrdPath string, timeLength string, consolidationFunction string, checkType string, dsName string, label string, unit string, logger *logrus.Logger) (*graph, error) {
46+
func newGraph(host string, graphDir string, rrdPath string, timeLength string, consolidationFunction string, checkType string, dsName string, label string, unit string, scale int, logger *logrus.Logger) (*graph, error) {
4547

4648
// Define directory and file paths
4749
dirPath := fmt.Sprintf("%s/imgs/%s", graphDir, host)
@@ -63,6 +65,7 @@ func newGraph(host string, graphDir string, rrdPath string, timeLength string, c
6365
timeLength: timeLength,
6466
dsName: dsName,
6567
unit: unit,
68+
scale: scale,
6669
consolidationFunction: consolidationFunction,
6770
color: GREEN,
6871
comment: comment,
@@ -78,6 +81,21 @@ func newGraph(host string, graphDir string, rrdPath string, timeLength string, c
7881
return graph, nil
7982
}
8083

84+
// displayVarName returns the RRD variable name used for display.
85+
// When scaling is applied, the variable includes the unit suffix (e.g., "latency_ms").
86+
// When no scaling is needed, it uses the raw variable directly (e.g., "latency_raw").
87+
func (g *graph) displayVarName() string {
88+
if g.needsScaling() {
89+
return fmt.Sprintf("%s_%s", g.dsName, g.unit)
90+
}
91+
return fmt.Sprintf("%s_raw", g.dsName)
92+
}
93+
94+
// needsScaling returns true if the raw value needs to be divided by a scale factor for display.
95+
func (g *graph) needsScaling() bool {
96+
return g.scale > 1
97+
}
98+
8199
// draw draws a graph based on the current parameters of the Graph struct.
82100
// It returns an error if the graph generation fails.
83101
func (g *graph) draw() error {
@@ -102,33 +120,37 @@ func (g *graph) draw() error {
102120
COMMENT:"\MAX latency over last 4h"
103121
*/
104122

105-
// Prepare the DEF and CDEF strings for each metric.
106-
defs := []string{}
107-
def := fmt.Sprintf("DEF:%s_raw=%s:%s:%s", g.dsName, g.rrdPath, g.dsName, g.consolidationFunction)
108-
defs = append(defs, def)
123+
displayVar := g.displayVarName()
109124

110-
cdefs := []string{}
111-
cdef := fmt.Sprintf("CDEF:%s_%s=%s_raw,1000,/", g.dsName, g.unit, g.dsName)
112-
cdefs = append(cdefs, cdef)
125+
// Prepare the DEF string for the raw data source.
126+
defs := []string{
127+
fmt.Sprintf("DEF:%s_raw=%s:%s:%s", g.dsName, g.rrdPath, g.dsName, g.consolidationFunction),
128+
}
129+
130+
// Prepare the CDEF string: apply scaling if needed, otherwise alias raw to display var.
131+
var cdefs []string
132+
if g.needsScaling() {
133+
cdefs = append(cdefs, fmt.Sprintf("CDEF:%s=%s_raw,%d,/", displayVar, g.dsName, g.scale))
134+
}
135+
// When no scaling is needed, displayVar is already "dsName_raw" which is the DEF name,
136+
// so no CDEF is required.
113137

114138
lines := []string{
115-
fmt.Sprintf("AREA:%s_%s#%s:%s", g.dsName, g.unit, g.color, g.label),
139+
fmt.Sprintf("AREA:%s#%s:%s", displayVar, g.color, g.label),
116140
}
117141

118-
gprints := []string{}
119142
gfmt := "%.2lf"
120-
gprintsMinval := fmt.Sprintf("Min\\: %s %s", gfmt, g.unit)
121-
gprints = append(gprints, fmt.Sprintf("GPRINT:%s_%s:MIN:%s", g.dsName, g.unit, gprintsMinval))
122-
gprintsMaxval := fmt.Sprintf("Max\\: %s %s", gfmt, g.unit)
123-
gprints = append(gprints, fmt.Sprintf("GPRINT:%s_%s:MAX:%s", g.dsName, g.unit, gprintsMaxval))
124-
gprintsAverageval := fmt.Sprintf("Average\\: %s %s", gfmt, g.unit)
125-
gprints = append(gprints, fmt.Sprintf("GPRINT:%s_%s:AVERAGE:%s", g.dsName, g.unit, gprintsAverageval))
126-
gprintsLastval := fmt.Sprintf("Last\\: %s %s", gfmt, g.unit)
127-
gprints = append(gprints, fmt.Sprintf("GPRINT:%s_%s:LAST:%s", g.dsName, g.unit, gprintsLastval))
128-
129-
commentStrings := []string{}
130-
commentStrings = append(commentStrings, "COMMENT:\\n")
131-
commentStrings = append(commentStrings, fmt.Sprintf("COMMENT:%s", g.comment))
143+
gprints := []string{
144+
fmt.Sprintf("GPRINT:%s:MIN:Min\\: %s %s", displayVar, gfmt, g.unit),
145+
fmt.Sprintf("GPRINT:%s:MAX:Max\\: %s %s", displayVar, gfmt, g.unit),
146+
fmt.Sprintf("GPRINT:%s:AVERAGE:Average\\: %s %s", displayVar, gfmt, g.unit),
147+
fmt.Sprintf("GPRINT:%s:LAST:Last\\: %s %s", displayVar, gfmt, g.unit),
148+
}
149+
150+
commentStrings := []string{
151+
"COMMENT:\\n",
152+
fmt.Sprintf("COMMENT:%s", g.comment),
153+
}
132154

133155
// Prepare the command for generating the graph.
134156
args := []string{

0 commit comments

Comments
 (0)