@@ -1042,7 +1042,7 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
1042
1042
log .Ops .Warningf (ctx , "problem fetching disk stats: %s; disk stats will be empty." , err )
1043
1043
} else {
1044
1044
rsr .last .disk = diskCounters
1045
- subtractDiskCounters (& diskCounters , rsr .initialDiskCounters )
1045
+ subtractDiskCounters (ctx , & diskCounters , & rsr .initialDiskCounters )
1046
1046
1047
1047
rsr .HostDiskReadBytes .Update (diskCounters .ReadBytes )
1048
1048
rsr .HostDiskReadCount .Update (diskCounters .readCount )
@@ -1063,11 +1063,11 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
1063
1063
}
1064
1064
} else {
1065
1065
deltaNet = nc // delta since *last* scrape
1066
- subtractNetworkCounters (& deltaNet , rsr .last .net )
1066
+ subtractNetworkCounters (ctx , & deltaNet , & rsr .last .net )
1067
1067
rsr .last .net = nc
1068
1068
1069
1069
// `nc` will now be the delta since *first* scrape.
1070
- subtractNetworkCounters (& nc , rsr .initialNetCounters )
1070
+ subtractNetworkCounters (ctx , & nc , & rsr .initialNetCounters )
1071
1071
// TODO(tbg): this is awkward: we're computing the delta above,
1072
1072
// why don't we increment the counters?
1073
1073
rsr .HostNetRecvBytes .Update (int64 (nc .IOCounters .BytesRecv ))
@@ -1443,19 +1443,38 @@ func sumAndFilterDiskCounters(disksStats []DiskStats) (DiskStats, error) {
1443
1443
return output , nil
1444
1444
}
1445
1445
1446
- // subtractDiskCounters subtracts the counters in `sub` from the counters in `from`,
1447
- // saving the results in `from`.
1448
- func subtractDiskCounters (from * DiskStats , sub DiskStats ) {
1449
- from .writeCount -= sub .writeCount
1450
- from .WriteBytes -= sub .WriteBytes
1451
- from .writeTime -= sub .writeTime
1452
-
1453
- from .readCount -= sub .readCount
1454
- from .ReadBytes -= sub .ReadBytes
1455
- from .readTime -= sub .readTime
1456
-
1457
- from .ioTime -= sub .ioTime
1458
- from .weightedIOTime -= sub .weightedIOTime
1446
+ // subtractDiskCounters subtracts the counters in `baseline` from the
1447
+ // counters in `stats`, saving the results in `stats`. If any counter
1448
+ // in `stats` is lower than the corresponding counter in `baseline`
1449
+ // (indicating a reset), the value for all metrics in `baseline`
1450
+ // is updated to the current value in `stats` to establish a new
1451
+ // baseline.
1452
+ func subtractDiskCounters (ctx context.Context , stats * DiskStats , baseline * DiskStats ) {
1453
+ if stats .WriteBytes < baseline .WriteBytes ||
1454
+ stats .writeCount < baseline .writeCount ||
1455
+ stats .writeTime < baseline .writeTime ||
1456
+ stats .ReadBytes < baseline .ReadBytes ||
1457
+ stats .readCount < baseline .readCount ||
1458
+ stats .readTime < baseline .readTime ||
1459
+ stats .ioTime < baseline .ioTime ||
1460
+ stats .weightedIOTime < baseline .weightedIOTime {
1461
+ * baseline = * stats
1462
+ * stats = DiskStats {}
1463
+ log .Ops .Info (ctx , "runtime: new baseline in disk stats from host. disk metric counters have been reset." )
1464
+ return
1465
+ }
1466
+
1467
+ // Perform normal subtraction
1468
+ stats .writeCount -= baseline .writeCount
1469
+ stats .WriteBytes -= baseline .WriteBytes
1470
+ stats .writeTime -= baseline .writeTime
1471
+
1472
+ stats .readCount -= baseline .readCount
1473
+ stats .ReadBytes -= baseline .ReadBytes
1474
+ stats .readTime -= baseline .readTime
1475
+
1476
+ stats .ioTime -= baseline .ioTime
1477
+ stats .weightedIOTime -= baseline .weightedIOTime
1459
1478
}
1460
1479
1461
1480
// sumNetworkCounters returns a new net.IOCountersStat whose values are the sum of the
@@ -1475,22 +1494,46 @@ func sumNetworkCounters(netCounters []net.IOCountersStat) net.IOCountersStat {
1475
1494
return output
1476
1495
}
1477
1496
1478
- // subtractNetworkCounters subtracts the counters in `sub` from the counters in `from`,
1479
- // saving the results in `from`.
1480
- func subtractNetworkCounters (from * netCounters , sub netCounters ) {
1481
- from .IOCounters .BytesRecv -= sub .IOCounters .BytesRecv
1482
- from .IOCounters .PacketsRecv -= sub .IOCounters .PacketsRecv
1483
- from .IOCounters .Errin -= sub .IOCounters .Errin
1484
- from .IOCounters .Dropin -= sub .IOCounters .Dropin
1485
- from .IOCounters .BytesSent -= sub .IOCounters .BytesSent
1486
- from .IOCounters .PacketsSent -= sub .IOCounters .PacketsSent
1487
- from .IOCounters .Errout -= sub .IOCounters .Errout
1488
- from .IOCounters .Dropout -= sub .IOCounters .Dropout
1489
- from .TCPRetransSegs -= sub .TCPRetransSegs
1490
- from .TCPFastRetrans -= sub .TCPFastRetrans
1491
- from .TCPTimeouts -= sub .TCPTimeouts
1492
- from .TCPSlowStartRetrans -= sub .TCPSlowStartRetrans
1493
- from .TCPLossProbes -= sub .TCPLossProbes
1497
+ // subtractNetworkCounters subtracts the counters in `baseline`
1498
+ // from the counters in `stats`, saving the results in `stats`. If
1499
+ // any counter in `stats` is lower than the corresponding counter
1500
+ // in `baseline` (indicating a reset), the value for all metrics in
1501
+ // `baseline` is updated to the current value in `stats` to establish
1502
+ // a new baseline.
1503
+ func subtractNetworkCounters (ctx context.Context , stats * netCounters , baseline * netCounters ) {
1504
+ if stats .IOCounters .BytesRecv < baseline .IOCounters .BytesRecv ||
1505
+ stats .IOCounters .PacketsRecv < baseline .IOCounters .PacketsRecv ||
1506
+ stats .IOCounters .Errin < baseline .IOCounters .Errin ||
1507
+ stats .IOCounters .Dropin < baseline .IOCounters .Dropin ||
1508
+ stats .IOCounters .BytesSent < baseline .IOCounters .BytesSent ||
1509
+ stats .IOCounters .PacketsSent < baseline .IOCounters .PacketsSent ||
1510
+ stats .IOCounters .Errout < baseline .IOCounters .Errout ||
1511
+ stats .IOCounters .Dropout < baseline .IOCounters .Dropout ||
1512
+ stats .TCPRetransSegs < baseline .TCPRetransSegs ||
1513
+ stats .TCPFastRetrans < baseline .TCPFastRetrans ||
1514
+ stats .TCPTimeouts < baseline .TCPTimeouts ||
1515
+ stats .TCPSlowStartRetrans < baseline .TCPSlowStartRetrans ||
1516
+ stats .TCPLossProbes < baseline .TCPLossProbes {
1517
+ * baseline = * stats
1518
+ * stats = netCounters {}
1519
+ log .Ops .Info (ctx , "runtime: new baseline in network stats from host. network metric counters have been reset." )
1520
+ return
1521
+ }
1522
+
1523
+ // Perform normal subtraction
1524
+ stats .IOCounters .BytesRecv -= baseline .IOCounters .BytesRecv
1525
+ stats .IOCounters .PacketsRecv -= baseline .IOCounters .PacketsRecv
1526
+ stats .IOCounters .Errin -= baseline .IOCounters .Errin
1527
+ stats .IOCounters .Dropin -= baseline .IOCounters .Dropin
1528
+ stats .IOCounters .BytesSent -= baseline .IOCounters .BytesSent
1529
+ stats .IOCounters .PacketsSent -= baseline .IOCounters .PacketsSent
1530
+ stats .IOCounters .Errout -= baseline .IOCounters .Errout
1531
+ stats .IOCounters .Dropout -= baseline .IOCounters .Dropout
1532
+ stats .TCPRetransSegs -= baseline .TCPRetransSegs
1533
+ stats .TCPFastRetrans -= baseline .TCPFastRetrans
1534
+ stats .TCPTimeouts -= baseline .TCPTimeouts
1535
+ stats .TCPSlowStartRetrans -= baseline .TCPSlowStartRetrans
1536
+ stats .TCPLossProbes -= baseline .TCPLossProbes
1494
1537
}
1495
1538
1496
1539
// GetProcCPUTime returns the cumulative user/system time (in ms) since the process start.
0 commit comments