Skip to content

Commit d5d63aa

Browse files
112274 converted cpu stats to support unsigned 64 bit number (#114681) (#115915)
(cherry picked from commit 78ccd2a) Co-authored-by: Souradip Poddar <[email protected]>
1 parent e620c67 commit d5d63aa

File tree

8 files changed

+118
-59
lines changed

8 files changed

+118
-59
lines changed

docs/changelog/114681.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 114681
2+
summary: "Support for unsigned 64 bit numbers in Cpu stats"
3+
area: Infra/Core
4+
type: enhancement
5+
issues:
6+
- 112274

server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,4 +1234,11 @@ public <T extends Writeable> void writeMissingWriteable(Class<T> ignored) throws
12341234
public void writeMissingString() throws IOException {
12351235
writeBoolean(false);
12361236
}
1237+
1238+
/**
1239+
* Write a {@link BigInteger} to the stream
1240+
*/
1241+
public void writeBigInteger(BigInteger bigInteger) throws IOException {
1242+
writeString(bigInteger.toString());
1243+
}
12371244
}

server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.lang.management.OperatingSystemMXBean;
2323
import java.lang.reflect.InvocationTargetException;
2424
import java.lang.reflect.Method;
25+
import java.math.BigInteger;
2526
import java.nio.file.Files;
2627
import java.nio.file.Path;
2728
import java.util.Collections;
@@ -341,8 +342,8 @@ List<String> readProcSelfCgroup() throws IOException {
341342
* @return the total CPU time in nanoseconds
342343
* @throws IOException if an I/O exception occurs reading {@code cpuacct.usage} for the control group
343344
*/
344-
private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException {
345-
return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup));
345+
private BigInteger getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException {
346+
return new BigInteger(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup));
346347
}
347348

348349
/**
@@ -435,21 +436,22 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws I
435436
* @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
436437
*/
437438
private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException {
439+
final var SENTINEL_VALUE = BigInteger.valueOf(-1);
438440
final List<String> lines = readSysFsCgroupCpuAcctCpuStat(controlGroup);
439-
long numberOfPeriods = -1;
440-
long numberOfTimesThrottled = -1;
441-
long timeThrottledNanos = -1;
441+
var numberOfPeriods = SENTINEL_VALUE;
442+
var numberOfTimesThrottled = SENTINEL_VALUE;
443+
var timeThrottledNanos = SENTINEL_VALUE;
442444
for (final String line : lines) {
443445
final String[] fields = line.split("\\s+");
444446
switch (fields[0]) {
445-
case "nr_periods" -> numberOfPeriods = Long.parseLong(fields[1]);
446-
case "nr_throttled" -> numberOfTimesThrottled = Long.parseLong(fields[1]);
447-
case "throttled_time" -> timeThrottledNanos = Long.parseLong(fields[1]);
447+
case "nr_periods" -> numberOfPeriods = new BigInteger(fields[1]);
448+
case "nr_throttled" -> numberOfTimesThrottled = new BigInteger(fields[1]);
449+
case "throttled_time" -> timeThrottledNanos = new BigInteger(fields[1]);
448450
}
449451
}
450-
assert numberOfPeriods != -1;
451-
assert numberOfTimesThrottled != -1;
452-
assert timeThrottledNanos != -1;
452+
assert numberOfPeriods.equals(SENTINEL_VALUE) == false;
453+
assert numberOfTimesThrottled.equals(SENTINEL_VALUE) == false;
454+
assert timeThrottledNanos.equals(SENTINEL_VALUE) == false;
453455
return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos);
454456
}
455457

@@ -635,28 +637,30 @@ boolean areCgroupStatsAvailable() throws IOException {
635637
* @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
636638
*/
637639
@SuppressForbidden(reason = "Uses PathUtils.get to generate meaningful assertion messages")
638-
private Map<String, Long> getCgroupV2CpuStats(String controlGroup) throws IOException {
640+
private Map<String, BigInteger> getCgroupV2CpuStats(String controlGroup) throws IOException {
639641
final List<String> lines = readCgroupV2CpuStats(controlGroup);
640-
final Map<String, Long> stats = new HashMap<>();
642+
final Map<String, BigInteger> stats = new HashMap<>();
643+
final BigInteger SENTINEL_VALUE = BigInteger.valueOf(-1);
641644

642645
for (String line : lines) {
643646
String[] parts = line.split("\\s+");
644647
assert parts.length == 2 : "Corrupt cpu.stat line: [" + line + "]";
645-
stats.put(parts[0], Long.parseLong(parts[1]));
648+
stats.put(parts[0], new BigInteger(parts[1]));
646649
}
647650

648651
final List<String> expectedKeys = List.of("system_usec", "usage_usec", "user_usec");
649652
expectedKeys.forEach(key -> {
650653
assert stats.containsKey(key) : "[" + key + "] missing from " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat");
651-
assert stats.get(key) != -1 : stats.get(key);
654+
assert stats.get(key).compareTo(SENTINEL_VALUE) != 0 : stats.get(key).toString();
652655
});
653656

654657
final List<String> optionalKeys = List.of("nr_periods", "nr_throttled", "throttled_usec");
655658
optionalKeys.forEach(key -> {
656659
if (stats.containsKey(key) == false) {
657-
stats.put(key, 0L);
660+
stats.put(key, BigInteger.ZERO);
658661
}
659-
assert stats.get(key) != -1L : "[" + key + "] in " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat") + " is -1";
662+
assert stats.get(key).compareTo(SENTINEL_VALUE) != 0
663+
: "[" + key + "] in " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat") + " is -1";
660664
});
661665

662666
return stats;
@@ -682,7 +686,7 @@ private OsStats.Cgroup getCgroup() {
682686
assert controllerMap.isEmpty() == false;
683687

684688
final String cpuAcctControlGroup;
685-
final long cgroupCpuAcctUsageNanos;
689+
final BigInteger cgroupCpuAcctUsageNanos;
686690
final long cgroupCpuAcctCpuCfsPeriodMicros;
687691
final long cgroupCpuAcctCpuCfsQuotaMicros;
688692
final String cpuControlGroup;
@@ -696,9 +700,11 @@ private OsStats.Cgroup getCgroup() {
696700
cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap.get("");
697701

698702
// `cpuacct` was merged with `cpu` in v2
699-
final Map<String, Long> cpuStatsMap = getCgroupV2CpuStats(cpuControlGroup);
703+
final Map<String, BigInteger> cpuStatsMap = getCgroupV2CpuStats(cpuControlGroup);
700704

701-
cgroupCpuAcctUsageNanos = cpuStatsMap.get("usage_usec") * 1000; // convert from micros to nanos
705+
final BigInteger THOUSAND = BigInteger.valueOf(1000);
706+
707+
cgroupCpuAcctUsageNanos = cpuStatsMap.get("usage_usec").multiply(THOUSAND); // convert from micros to nanos
702708

703709
long[] cpuLimits = getCgroupV2CpuLimit(cpuControlGroup);
704710
cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits[0];
@@ -707,7 +713,7 @@ private OsStats.Cgroup getCgroup() {
707713
cpuStat = new OsStats.Cgroup.CpuStat(
708714
cpuStatsMap.get("nr_periods"),
709715
cpuStatsMap.get("nr_throttled"),
710-
cpuStatsMap.get("throttled_usec") * 1000
716+
cpuStatsMap.get("throttled_usec").multiply(THOUSAND)
711717
);
712718

713719
cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes(memoryControlGroup);

server/src/main/java/org/elasticsearch/monitor/os/OsStats.java

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.elasticsearch.xcontent.XContentBuilder;
2121

2222
import java.io.IOException;
23+
import java.math.BigInteger;
2324
import java.util.Arrays;
2425
import java.util.Objects;
2526

@@ -362,7 +363,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
362363
public static class Cgroup implements Writeable, ToXContentFragment {
363364

364365
private final String cpuAcctControlGroup;
365-
private final long cpuAcctUsageNanos;
366+
private final BigInteger cpuAcctUsageNanos;
366367
private final String cpuControlGroup;
367368
private final long cpuCfsPeriodMicros;
368369
private final long cpuCfsQuotaMicros;
@@ -387,7 +388,7 @@ public String getCpuAcctControlGroup() {
387388
*
388389
* @return the total CPU time in nanoseconds
389390
*/
390-
public long getCpuAcctUsageNanos() {
391+
public BigInteger getCpuAcctUsageNanos() {
391392
return cpuAcctUsageNanos;
392393
}
393394

@@ -465,7 +466,7 @@ public String getMemoryUsageInBytes() {
465466

466467
public Cgroup(
467468
final String cpuAcctControlGroup,
468-
final long cpuAcctUsageNanos,
469+
final BigInteger cpuAcctUsageNanos,
469470
final String cpuControlGroup,
470471
final long cpuCfsPeriodMicros,
471472
final long cpuCfsQuotaMicros,
@@ -487,7 +488,11 @@ public Cgroup(
487488

488489
Cgroup(final StreamInput in) throws IOException {
489490
cpuAcctControlGroup = in.readString();
490-
cpuAcctUsageNanos = in.readLong();
491+
if (in.getTransportVersion().onOrAfter(TransportVersions.CPU_STAT_STRING_PARSING)) {
492+
cpuAcctUsageNanos = in.readBigInteger();
493+
} else {
494+
cpuAcctUsageNanos = BigInteger.valueOf(in.readLong());
495+
}
491496
cpuControlGroup = in.readString();
492497
cpuCfsPeriodMicros = in.readLong();
493498
cpuCfsQuotaMicros = in.readLong();
@@ -500,7 +505,11 @@ public Cgroup(
500505
@Override
501506
public void writeTo(final StreamOutput out) throws IOException {
502507
out.writeString(cpuAcctControlGroup);
503-
out.writeLong(cpuAcctUsageNanos);
508+
if (out.getTransportVersion().onOrAfter(TransportVersions.CPU_STAT_STRING_PARSING)) {
509+
out.writeBigInteger(cpuAcctUsageNanos);
510+
} else {
511+
out.writeLong(cpuAcctUsageNanos.longValue());
512+
}
504513
out.writeString(cpuControlGroup);
505514
out.writeLong(cpuCfsPeriodMicros);
506515
out.writeLong(cpuCfsQuotaMicros);
@@ -551,17 +560,17 @@ public XContentBuilder toXContent(final XContentBuilder builder, final Params pa
551560
*/
552561
public static class CpuStat implements Writeable, ToXContentFragment {
553562

554-
private final long numberOfElapsedPeriods;
555-
private final long numberOfTimesThrottled;
556-
private final long timeThrottledNanos;
563+
private final BigInteger numberOfElapsedPeriods;
564+
private final BigInteger numberOfTimesThrottled;
565+
private final BigInteger timeThrottledNanos;
557566

558567
/**
559568
* The number of elapsed periods.
560569
*
561570
* @return the number of elapsed periods as measured by
562571
* {@code cpu.cfs_period_us}
563572
*/
564-
public long getNumberOfElapsedPeriods() {
573+
public BigInteger getNumberOfElapsedPeriods() {
565574
return numberOfElapsedPeriods;
566575
}
567576

@@ -571,7 +580,7 @@ public long getNumberOfElapsedPeriods() {
571580
*
572581
* @return the number of times
573582
*/
574-
public long getNumberOfTimesThrottled() {
583+
public BigInteger getNumberOfTimesThrottled() {
575584
return numberOfTimesThrottled;
576585
}
577586

@@ -581,27 +590,43 @@ public long getNumberOfTimesThrottled() {
581590
*
582591
* @return the total time in nanoseconds
583592
*/
584-
public long getTimeThrottledNanos() {
593+
public BigInteger getTimeThrottledNanos() {
585594
return timeThrottledNanos;
586595
}
587596

588-
public CpuStat(final long numberOfElapsedPeriods, final long numberOfTimesThrottled, final long timeThrottledNanos) {
597+
public CpuStat(
598+
final BigInteger numberOfElapsedPeriods,
599+
final BigInteger numberOfTimesThrottled,
600+
final BigInteger timeThrottledNanos
601+
) {
589602
this.numberOfElapsedPeriods = numberOfElapsedPeriods;
590603
this.numberOfTimesThrottled = numberOfTimesThrottled;
591604
this.timeThrottledNanos = timeThrottledNanos;
592605
}
593606

594607
CpuStat(final StreamInput in) throws IOException {
595-
numberOfElapsedPeriods = in.readLong();
596-
numberOfTimesThrottled = in.readLong();
597-
timeThrottledNanos = in.readLong();
608+
if (in.getTransportVersion().onOrAfter(TransportVersions.CPU_STAT_STRING_PARSING)) {
609+
numberOfElapsedPeriods = in.readBigInteger();
610+
numberOfTimesThrottled = in.readBigInteger();
611+
timeThrottledNanos = in.readBigInteger();
612+
} else {
613+
numberOfElapsedPeriods = BigInteger.valueOf(in.readLong());
614+
numberOfTimesThrottled = BigInteger.valueOf(in.readLong());
615+
timeThrottledNanos = BigInteger.valueOf(in.readLong());
616+
}
598617
}
599618

600619
@Override
601620
public void writeTo(final StreamOutput out) throws IOException {
602-
out.writeLong(numberOfElapsedPeriods);
603-
out.writeLong(numberOfTimesThrottled);
604-
out.writeLong(timeThrottledNanos);
621+
if (out.getTransportVersion().onOrAfter(TransportVersions.CPU_STAT_STRING_PARSING)) {
622+
out.writeBigInteger(numberOfElapsedPeriods);
623+
out.writeBigInteger(numberOfTimesThrottled);
624+
out.writeBigInteger(timeThrottledNanos);
625+
} else {
626+
out.writeLong(numberOfElapsedPeriods.longValue());
627+
out.writeLong(numberOfTimesThrottled.longValue());
628+
out.writeLong(timeThrottledNanos.longValue());
629+
}
605630
}
606631

607632
@Override

server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.elasticsearch.xcontent.ToXContent;
8686

8787
import java.io.IOException;
88+
import java.math.BigInteger;
8889
import java.nio.file.Path;
8990
import java.util.ArrayList;
9091
import java.util.Collections;
@@ -709,11 +710,15 @@ public static NodeStats createNodeStats() {
709710
new OsStats.Swap(swapTotal, randomLongBetween(0, swapTotal)),
710711
new OsStats.Cgroup(
711712
randomAlphaOfLength(8),
712-
randomNonNegativeLong(),
713+
randomUnsignedLongBetween(BigInteger.ZERO, BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.TWO)),
713714
randomAlphaOfLength(8),
714715
randomNonNegativeLong(),
715716
randomNonNegativeLong(),
716-
new OsStats.Cgroup.CpuStat(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()),
717+
new OsStats.Cgroup.CpuStat(
718+
randomUnsignedLongBetween(BigInteger.ZERO, BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.TWO)),
719+
randomUnsignedLongBetween(BigInteger.ZERO, BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.TWO)),
720+
randomUnsignedLongBetween(BigInteger.ZERO, BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.TWO))
721+
),
717722
randomAlphaOfLength(8),
718723
Long.toString(randomNonNegativeLong()),
719724
Long.toString(randomNonNegativeLong())

server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,12 @@ public void testOsStats() {
136136
if (Constants.LINUX) {
137137
if (stats.getCgroup() != null) {
138138
assertThat(stats.getCgroup().getCpuAcctControlGroup(), notNullValue());
139-
assertThat(stats.getCgroup().getCpuAcctUsageNanos(), greaterThan(0L));
139+
assertThat(stats.getCgroup().getCpuAcctUsageNanos(), greaterThan(BigInteger.ZERO));
140140
assertThat(stats.getCgroup().getCpuCfsQuotaMicros(), anyOf(equalTo(-1L), greaterThanOrEqualTo(0L)));
141141
assertThat(stats.getCgroup().getCpuCfsPeriodMicros(), greaterThanOrEqualTo(0L));
142-
assertThat(stats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(), greaterThanOrEqualTo(0L));
143-
assertThat(stats.getCgroup().getCpuStat().getNumberOfTimesThrottled(), greaterThanOrEqualTo(0L));
144-
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
142+
assertThat(stats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(), greaterThanOrEqualTo(BigInteger.ZERO));
143+
assertThat(stats.getCgroup().getCpuStat().getNumberOfTimesThrottled(), greaterThanOrEqualTo(BigInteger.ZERO));
144+
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(BigInteger.ZERO));
145145
// These could be null if transported from a node running an older version, but shouldn't be null on the current node
146146
assertThat(stats.getCgroup().getMemoryControlGroup(), notNullValue());
147147
String memoryLimitInBytes = stats.getCgroup().getMemoryLimitInBytes();
@@ -191,26 +191,26 @@ public void testCgroupProbe() {
191191
case 1 -> {
192192
assertNotNull(cgroup);
193193
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
194-
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
194+
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(new BigInteger("364869866063112")));
195195
assertThat(cgroup.getCpuControlGroup(), equalTo("/" + hierarchy));
196196
assertThat(cgroup.getCpuCfsPeriodMicros(), equalTo(100000L));
197197
assertThat(cgroup.getCpuCfsQuotaMicros(), equalTo(50000L));
198-
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(17992L));
199-
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(1311L));
200-
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(139298645489L));
198+
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(BigInteger.valueOf(17992)));
199+
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(BigInteger.valueOf(1311)));
200+
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(new BigInteger("139298645489")));
201201
assertThat(cgroup.getMemoryLimitInBytes(), equalTo("18446744073709551615"));
202202
assertThat(cgroup.getMemoryUsageInBytes(), equalTo("4796416"));
203203
}
204204
case 2 -> {
205205
assertNotNull(cgroup);
206206
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
207-
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063000L));
207+
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(new BigInteger("364869866063000")));
208208
assertThat(cgroup.getCpuControlGroup(), equalTo("/" + hierarchy));
209209
assertThat(cgroup.getCpuCfsPeriodMicros(), equalTo(100000L));
210210
assertThat(cgroup.getCpuCfsQuotaMicros(), equalTo(50000L));
211-
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(17992L));
212-
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(1311L));
213-
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(139298645000L));
211+
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(BigInteger.valueOf(17992)));
212+
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(BigInteger.valueOf(1311)));
213+
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(new BigInteger("139298645000")));
214214
assertThat(cgroup.getMemoryLimitInBytes(), equalTo("18446744073709551615"));
215215
assertThat(cgroup.getMemoryUsageInBytes(), equalTo("4796416"));
216216
}

0 commit comments

Comments
 (0)