@@ -82,6 +82,8 @@ func NewProcessStatService(cfg config.RateLimitConfig) *ProcessStatService {
8282}
8383
8484// TrackProcess starts a new goroutine to keep track of the process.
85+ //
86+ // We need to track all child-processes alongside the main PID we're given.
8587func (p * ProcessStatService ) TrackProcess (ctx context.Context , pid int32 ) {
8688 go func () {
8789 logger := p .log .With ("pid" , pid )
@@ -94,18 +96,18 @@ func (p *ProcessStatService) TrackProcess(ctx context.Context, pid int32) {
9496 return
9597 }
9698
97- peakMemory := 0
99+ var peakMemory int64
98100 defer func () {
99101 // We only do the lock once per process. This reduces contention significantly.
100102 p .mu .Lock ()
101103 defer p .mu .Unlock ()
102104
103105 if p .PeakMemory == 0 {
104- p .PeakMemory = int64 ( peakMemory )
106+ p .PeakMemory = peakMemory
105107 } else {
106- p .PeakMemory = (p .PeakMemory * (p .cfg .TrackerDecay - 1 ) + int64 ( peakMemory ) ) / p .cfg .TrackerDecay
108+ p .PeakMemory = (p .PeakMemory * (p .cfg .TrackerDecay - 1 ) + peakMemory ) / p .cfg .TrackerDecay
107109 }
108- p .MaxMemory = max (p .MaxMemory , int64 ( peakMemory ) )
110+ p .MaxMemory = max (p .MaxMemory , peakMemory )
109111
110112 MetricProcessMaxMemory .Set (float64 (p .MaxMemory ))
111113 MetricProcessPeakMemoryAverage .Set (float64 (p .PeakMemory ))
@@ -119,15 +121,29 @@ func (p *ProcessStatService) TrackProcess(ctx context.Context, pid int32) {
119121 case <- time .After (p .cfg .TrackerInterval ):
120122 }
121123
122- mem , err := proc .MemoryInfoWithContext (ctx )
123- if errors .Is (err , context .Canceled ) || errors .Is (err , process .ErrorProcessNotRunning ) {
124- return
125- } else if err != nil {
126- logger .Warn ("failed to find memory info about process" , "err" , err )
124+ if running , _ := proc .IsRunningWithContext (ctx ); ! running {
127125 return
128126 }
129127
130- peakMemory = max (peakMemory , int ( mem . RSS ))
128+ peakMemory = max (peakMemory , recursiveMemory ( ctx , proc ))
131129 }
132130 }()
133131}
132+
133+ // recursiveMemory calculates the total memory used by a process and all its children.
134+ // This is a best-effort function and may return partial results if processes exit while being queried, or are inaccessible to the current process.
135+ // We don't return any errors, and silently will just return a bad value if this is the case. This is good _enough_ for our use case.
136+ func recursiveMemory (ctx context.Context , proc * process.Process ) int64 {
137+ mem , err := proc .MemoryInfoWithContext (ctx )
138+ if err != nil {
139+ return 0
140+ }
141+
142+ sum := int64 (mem .RSS )
143+ // We don't care about errors here. If we get no children, we'll just move on.
144+ children , _ := proc .ChildrenWithContext (ctx )
145+ for _ , child := range children {
146+ sum += recursiveMemory (ctx , child )
147+ }
148+ return sum
149+ }
0 commit comments