Skip to content

Commit 3bfa553

Browse files
authored
fix(tracking): track children processes' memory (#856)
1 parent 3e37df1 commit 3bfa553

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

pkg/service/process.go

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ func NewProcessStatService(cfg config.RateLimitConfig) *ProcessStatService {
8282
}
8383

8484
// TrackProcess starts a new goroutine to keep track of the process.
85+
//
86+
// We need to track all child-processes alongside the main PID we're given.
8587
func (p *ProcessStatService) TrackProcess(ctx context.Context, pid int32) {
8688
go func() {
8789
logger := p.log.With("pid", pid)
@@ -94,18 +96,18 @@ func (p *ProcessStatService) TrackProcess(ctx context.Context, pid int32) {
9496
return
9597
}
9698

97-
peakMemory := 0
99+
var peakMemory int64
98100
defer func() {
99101
// We only do the lock once per process. This reduces contention significantly.
100102
p.mu.Lock()
101103
defer p.mu.Unlock()
102104

103105
if p.PeakMemory == 0 {
104-
p.PeakMemory = int64(peakMemory)
106+
p.PeakMemory = peakMemory
105107
} else {
106-
p.PeakMemory = (p.PeakMemory*(p.cfg.TrackerDecay-1) + int64(peakMemory)) / p.cfg.TrackerDecay
108+
p.PeakMemory = (p.PeakMemory*(p.cfg.TrackerDecay-1) + peakMemory) / p.cfg.TrackerDecay
107109
}
108-
p.MaxMemory = max(p.MaxMemory, int64(peakMemory))
110+
p.MaxMemory = max(p.MaxMemory, peakMemory)
109111

110112
MetricProcessMaxMemory.Set(float64(p.MaxMemory))
111113
MetricProcessPeakMemoryAverage.Set(float64(p.PeakMemory))
@@ -119,15 +121,29 @@ func (p *ProcessStatService) TrackProcess(ctx context.Context, pid int32) {
119121
case <-time.After(p.cfg.TrackerInterval):
120122
}
121123

122-
mem, err := proc.MemoryInfoWithContext(ctx)
123-
if errors.Is(err, context.Canceled) || errors.Is(err, process.ErrorProcessNotRunning) {
124-
return
125-
} else if err != nil {
126-
logger.Warn("failed to find memory info about process", "err", err)
124+
if running, _ := proc.IsRunningWithContext(ctx); !running {
127125
return
128126
}
129127

130-
peakMemory = max(peakMemory, int(mem.RSS))
128+
peakMemory = max(peakMemory, recursiveMemory(ctx, proc))
131129
}
132130
}()
133131
}
132+
133+
// recursiveMemory calculates the total memory used by a process and all its children.
134+
// This is a best-effort function and may return partial results if processes exit while being queried, or are inaccessible to the current process.
135+
// We don't return any errors, and silently will just return a bad value if this is the case. This is good _enough_ for our use case.
136+
func recursiveMemory(ctx context.Context, proc *process.Process) int64 {
137+
mem, err := proc.MemoryInfoWithContext(ctx)
138+
if err != nil {
139+
return 0
140+
}
141+
142+
sum := int64(mem.RSS)
143+
// We don't care about errors here. If we get no children, we'll just move on.
144+
children, _ := proc.ChildrenWithContext(ctx)
145+
for _, child := range children {
146+
sum += recursiveMemory(ctx, child)
147+
}
148+
return sum
149+
}

0 commit comments

Comments
 (0)