Skip to content

Commit e6e2d22

Browse files
committed
server: expose Go soft memory limit as a metric
If the Go memory usage gets close to the soft memory limit, performance can plummet because the GC has to run constantly. This commit adds a metric showing the soft memory limit, so it's easy to see when the Go total and alloc graphs get close to that. Epic: none Release note: None
1 parent 5798d47 commit e6e2d22

File tree

7 files changed

+42
-0
lines changed

7 files changed

+42
-0
lines changed

docs/generated/eventlog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ An event of type `runtime_stats` is recorded every 10 seconds as server health m
522522
| `GCRunCount` | The total number of GC runs. | no |
523523
| `NetHostRecvBytes` | The bytes received on all network interfaces since this process started. | no |
524524
| `NetHostSendBytes` | The bytes sent on all network interfaces since this process started. | no |
525+
| `GoLimitBytes` | The soft Go memory limit in bytes. | no |
525526

526527

527528
#### Common fields

docs/generated/metrics/metrics.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10366,6 +10366,14 @@ layers:
1036610366
unit: BYTES
1036710367
aggregation: AVG
1036810368
derivative: NONE
10369+
- name: sys.go.limitbytes
10370+
exported_name: sys_go_limitbytes
10371+
description: Go soft memory limit
10372+
y_axis_label: Memory
10373+
type: GAUGE
10374+
unit: BYTES
10375+
aggregation: AVG
10376+
derivative: NONE
1036910377
- name: sys.go.pause.other.ns
1037010378
exported_name: sys_go_pause_other_ns
1037110379
description: Estimated non-GC-related total pause time

pkg/roachprod/opentelemetry/cockroachdb_metrics.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,7 @@ var cockroachdbMetrics = map[string]string{
20442044
"sys_go_heap_heapfragmentbytes": "sys.go.heap.heapfragmentbytes",
20452045
"sys_go_heap_heapreleasedbytes": "sys.go.heap.heapreleasedbytes",
20462046
"sys_go_heap_heapreservedbytes": "sys.go.heap.heapreservedbytes",
2047+
"sys_go_limitbytes": "sys.go.limitbytes",
20472048
"sys_go_pause_other_ns": "sys.go.pause.other.ns",
20482049
"sys_go_stack_systembytes": "sys.go.stack.systembytes",
20492050
"sys_go_stop_other_ns": "sys.go.stop.other.ns",

pkg/server/status/runtime.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ var (
6767
Measurement: "Memory",
6868
Unit: metric.Unit_BYTES,
6969
}
70+
metaGoLimitBytes = metric.Metadata{
71+
Name: "sys.go.limitbytes",
72+
Help: "Go soft memory limit",
73+
Measurement: "Memory",
74+
Unit: metric.Unit_BYTES,
75+
}
7076
metaGoMemStackSysBytes = metric.Metadata{
7177
Name: "sys.go.stack.systembytes",
7278
Help: "Stack memory obtained from the OS.",
@@ -591,13 +597,17 @@ const runtimeMetricMemStackOSBytes = "/memory/classes/os-stacks:bytes"
591597
// metrics in /memory/classes.
592598
const runtimeMetricGoTotal = "/memory/classes/total:bytes"
593599

600+
// Current soft memory limit (see debug.SetMemoryLimit).
601+
const runtimeMetricGoLimit = "/gc/gomemlimit:bytes"
602+
594603
// Count of all completed GC cycles.
595604
const runtimeMetricGCCount = "/gc/cycles/total:gc-cycles"
596605

597606
var runtimeMetrics = []string{
598607
runtimeMetricGCAssist,
599608
runtimeMetricGoTotal,
600609
runtimeMetricHeapAlloc,
610+
runtimeMetricGoLimit,
601611
runtimeMetricHeapFragmentBytes,
602612
runtimeMetricHeapReservedBytes,
603613
runtimeMetricHeapReleasedBytes,
@@ -774,6 +784,7 @@ type RuntimeStatSampler struct {
774784
RunnableGoroutinesPerCPU *metric.GaugeFloat64
775785
GoAllocBytes *metric.Gauge
776786
GoTotalBytes *metric.Gauge
787+
GoLimitBytes *metric.Gauge
777788
GoMemStackSysBytes *metric.Gauge
778789
GoHeapFragmentBytes *metric.Gauge
779790
GoHeapReservedBytes *metric.Gauge
@@ -875,6 +886,7 @@ func NewRuntimeStatSampler(ctx context.Context, clock hlc.WallClock) *RuntimeSta
875886
RunnableGoroutinesPerCPU: metric.NewGaugeFloat64(metaRunnableGoroutinesPerCPU),
876887
GoAllocBytes: metric.NewGauge(metaGoAllocBytes),
877888
GoTotalBytes: metric.NewGauge(metaGoTotalBytes),
889+
GoLimitBytes: metric.NewGauge(metaGoLimitBytes),
878890
GoMemStackSysBytes: metric.NewGauge(metaGoMemStackSysBytes),
879891
GoHeapFragmentBytes: metric.NewGauge(metaGoHeapFragmentBytes),
880892
GoHeapReservedBytes: metric.NewGauge(metaGoHeapReservedBytes),
@@ -1136,6 +1148,10 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
11361148
goAlloc := rsr.goRuntimeSampler.uint64(runtimeMetricHeapAlloc)
11371149
goTotal := rsr.goRuntimeSampler.uint64(runtimeMetricGoTotal) -
11381150
rsr.goRuntimeSampler.uint64(runtimeMetricHeapReleasedBytes)
1151+
goLimit := rsr.goRuntimeSampler.uint64(runtimeMetricGoLimit)
1152+
if goLimit == math.MaxInt64 {
1153+
goLimit = 0
1154+
}
11391155
stackTotal := rsr.goRuntimeSampler.uint64(runtimeMetricMemStackHeapBytes) +
11401156
osStackBytes
11411157
heapFragmentBytes := rsr.goRuntimeSampler.uint64(runtimeMetricHeapFragmentBytes)
@@ -1147,6 +1163,7 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
11471163
MemStackSysBytes: stackTotal,
11481164
GoAllocBytes: goAlloc,
11491165
GoTotalBytes: goTotal,
1166+
GoLimitBytes: goLimit,
11501167
HeapFragmentBytes: heapFragmentBytes,
11511168
HeapReservedBytes: heapReservedBytes,
11521169
HeapReleasedBytes: heapReleasedBytes,
@@ -1168,6 +1185,7 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
11681185

11691186
rsr.GoAllocBytes.Update(int64(goAlloc))
11701187
rsr.GoTotalBytes.Update(int64(goTotal))
1188+
rsr.GoLimitBytes.Update(int64(goLimit))
11711189
rsr.GoMemStackSysBytes.Update(int64(osStackBytes))
11721190
rsr.GoHeapFragmentBytes.Update(int64(heapFragmentBytes))
11731191
rsr.GoHeapReservedBytes.Update(int64(heapReservedBytes))

pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/runtime.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ export default function (props: GraphDashboardProps) {
5151
<dd>Memory allocated by the Go layer</dd>
5252
<dt>Go Total</dt>
5353
<dd>Total memory managed by the Go layer</dd>
54+
<dt>Go Limit</dt>
55+
<dd>Go soft memory limit</dd>
5456
<dt>C Allocated</dt>
5557
<dd>Memory allocated by the C layer</dd>
5658
<dt>C Total</dt>
@@ -64,6 +66,7 @@ export default function (props: GraphDashboardProps) {
6466
<Metric name="cr.node.sys.rss" title="Total memory (RSS)" />
6567
<Metric name="cr.node.sys.go.allocbytes" title="Go Allocated" />
6668
<Metric name="cr.node.sys.go.totalbytes" title="Go Total" />
69+
<Metric name="cr.node.sys.go.limitbytes" title="Go Limit" />
6770
<Metric name="cr.node.sys.cgo.allocbytes" title="CGo Allocated" />
6871
<Metric name="cr.node.sys.cgo.totalbytes" title="CGo Total" />
6972
</Axis>

pkg/util/log/eventpb/health_events.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ message RuntimeStats {
5959
uint64 net_host_recv_bytes = 18 [(gogoproto.jsontag) = ",omitempty"];
6060
// The bytes sent on all network interfaces since this process started.
6161
uint64 net_host_send_bytes = 19 [(gogoproto.jsontag) = ",omitempty"];
62+
// The soft Go memory limit in bytes.
63+
uint64 go_limit_bytes = 20 [(gogoproto.jsontag) = ",omitempty"];
6264
}
6365

6466
// HotRangesStats

pkg/util/log/eventpb/json_encode_generated.go

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)