Skip to content

Commit 6ccde36

Browse files
committed
prober: record total bytes transferred in DERP bandwidth probes
This will enable Prometheus queries to look at the bandwidth over time windows, for example 'increase(derp_bw_bytes_total)[1h] / increase(derp_bw_transfer_time_seconds_total)[1h]'. Fixes commit a51672c. Updates tailscale/corp#25503 Signed-off-by: Percy Wegmann <[email protected]>
1 parent 377127c commit 6ccde36

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

prober/derp.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,25 +301,30 @@ func (d *derpProber) probeBandwidth(from, to string, size int64) ProbeClass {
301301
derpPath = "single"
302302
}
303303
var transferTimeSeconds expvar.Float
304+
var totalBytesTransferred expvar.Float
304305
return ProbeClass{
305306
Probe: func(ctx context.Context) error {
306307
fromN, toN, err := d.getNodePair(from, to)
307308
if err != nil {
308309
return err
309310
}
310-
return derpProbeBandwidth(ctx, d.lastDERPMap, fromN, toN, size, &transferTimeSeconds, d.bwTUNIPv4Prefix)
311+
return derpProbeBandwidth(ctx, d.lastDERPMap, fromN, toN, size, &transferTimeSeconds, &totalBytesTransferred, d.bwTUNIPv4Prefix)
311312
},
312313
Class: "derp_bw",
313314
Labels: Labels{
314315
"derp_path": derpPath,
315316
"tcp_in_tcp": strconv.FormatBool(d.bwTUNIPv4Prefix != nil),
316317
},
317318
Metrics: func(l prometheus.Labels) []prometheus.Metric {
318-
return []prometheus.Metric{
319+
metrics := []prometheus.Metric{
319320
prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_probe_size_bytes", "Payload size of the bandwidth prober", nil, l), prometheus.GaugeValue, float64(size)),
320321
prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_transfer_time_seconds_total", "Time it took to transfer data", nil, l), prometheus.CounterValue, transferTimeSeconds.Value()),
321-
prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_bytes_total", "Amount of data transferred", nil, l), prometheus.CounterValue, float64(size)),
322322
}
323+
if d.bwTUNIPv4Prefix != nil {
324+
// For TCP-in-TCP probes, also record cumulative bytes transferred.
325+
metrics = append(metrics, prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_bytes_total", "Amount of data transferred", nil, l), prometheus.CounterValue, totalBytesTransferred.Value()))
326+
}
327+
return metrics
323328
},
324329
}
325330
}
@@ -655,7 +660,7 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) error {
655660
// DERP clients connected to two DERP servers.If tunIPv4Address is specified,
656661
// probes will use a TCP connection over a TUN device at this address in order
657662
// to exercise TCP-in-TCP in similar fashion to TCP over Tailscale via DERP.
658-
func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, size int64, transferTimeSeconds *expvar.Float, tunIPv4Prefix *netip.Prefix) (err error) {
663+
func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, size int64, transferTimeSeconds, totalBytesTransferred *expvar.Float, tunIPv4Prefix *netip.Prefix) (err error) {
659664
// This probe uses clients with isProber=false to avoid spamming the derper logs with every packet
660665
// sent by the bandwidth probe.
661666
fromc, err := newConn(ctx, dm, from, false)
@@ -677,7 +682,7 @@ func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tail
677682
}
678683

679684
if tunIPv4Prefix != nil {
680-
err = derpProbeBandwidthTUN(ctx, transferTimeSeconds, from, to, fromc, toc, size, tunIPv4Prefix)
685+
err = derpProbeBandwidthTUN(ctx, transferTimeSeconds, totalBytesTransferred, from, to, fromc, toc, size, tunIPv4Prefix)
681686
} else {
682687
err = derpProbeBandwidthDirect(ctx, transferTimeSeconds, from, to, fromc, toc, size)
683688
}
@@ -848,7 +853,7 @@ var derpProbeBandwidthTUNMu sync.Mutex
848853
// to another over a TUN device at an address at the start of the usable host IP
849854
// range that the given tunAddress lives in. The time taken to finish the transfer
850855
// is recorded in `transferTimeSeconds`.
851-
func derpProbeBandwidthTUN(ctx context.Context, transferTimeSeconds *expvar.Float, from, to *tailcfg.DERPNode, fromc, toc *derphttp.Client, size int64, prefix *netip.Prefix) error {
856+
func derpProbeBandwidthTUN(ctx context.Context, transferTimeSeconds, totalBytesTransferred *expvar.Float, from, to *tailcfg.DERPNode, fromc, toc *derphttp.Client, size int64, prefix *netip.Prefix) error {
852857
// Make sure all goroutines have finished.
853858
var wg sync.WaitGroup
854859
defer wg.Wait()
@@ -1046,9 +1051,10 @@ func derpProbeBandwidthTUN(ctx context.Context, transferTimeSeconds *expvar.Floa
10461051
readFinishedC <- fmt.Errorf("unable to set read deadline: %w", err)
10471052
}
10481053
}
1049-
_, err = io.CopyN(io.Discard, readConn, size)
1050-
// Measure transfer time irrespective of whether it succeeded or failed.
1054+
n, err := io.CopyN(io.Discard, readConn, size)
1055+
// Measure transfer time and bytes transferred irrespective of whether it succeeded or failed.
10511056
transferTimeSeconds.Add(time.Since(start).Seconds())
1057+
totalBytesTransferred.Add(float64(n))
10521058
readFinishedC <- err
10531059
}()
10541060

0 commit comments

Comments
 (0)