Skip to content

Commit 8b865f2

Browse files
committed
Re-read process environment if it is empty
commit_hash:76c5493a0ae35b13260c269dafb8f49a0a5f2ed8
1 parent 8f3430d commit 8b865f2

File tree

1 file changed

+51
-6
lines changed
  • perforator/agent/collector/pkg/process

1 file changed

+51
-6
lines changed

perforator/agent/collector/pkg/process/map.go

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"sync/atomic"
1212
"time"
1313

14+
"github.com/cenkalti/backoff/v4"
1415
"golang.org/x/exp/maps"
1516
"golang.org/x/sync/errgroup"
1617

@@ -64,6 +65,9 @@ type processRegistryMetrics struct {
6465
mappingsFailedScheduleUpload metrics.Counter
6566
mappingsFailedNameToHandleAt metrics.Counter
6667
mappingsFailedELFVaddrRetrieval metrics.Counter
68+
69+
processesWithEmptyEnvironment metrics.Counter
70+
processEnvironmentWaitDelay metrics.Counter
6771
}
6872

6973
type mappingImpl struct {
@@ -215,6 +219,8 @@ func NewProcessRegistry(
215219
mappingsFailedScheduleUpload: m.WithTags(map[string]string{"kind": "failed_schedule_upload"}).Counter("mappings.count"),
216220
mappingsFailedNameToHandleAt: m.WithTags(map[string]string{"kind": "failed_name_to_handle_at"}).Counter("mappings.count"),
217221
mappingsFailedELFVaddrRetrieval: m.WithTags(map[string]string{"kind": "failed_elf_vaddr_retrieval"}).Counter("mappings.count"),
222+
processesWithEmptyEnvironment: m.Counter("processes.with_empty_environment.count"),
223+
processEnvironmentWaitDelay: m.Counter("environment.wait_delay.total.milliseconds"),
218224
},
219225
processScanner: processScanner,
220226
listeners: listeners,
@@ -896,13 +902,52 @@ func iterateMappingLPMSegments(m Mapping, callback func(address uint64, prefix u
896902
////////////////////////////////////////////////////////////////////////////////
897903

898904
func (a *processAnalyzer) loadEnvs(ctx context.Context) error {
899-
envs, err := procfs.Process(a.proc.currentNamespaceID).ListEnvs()
900-
if err != nil {
901-
return err
902-
}
905+
proc := procfs.Process(a.proc.currentNamespaceID)
906+
backoff := backoff.NewExponentialBackOff(
907+
backoff.WithInitialInterval(1*time.Millisecond),
908+
backoff.WithMultiplier(2),
909+
backoff.WithMaxElapsedTime(1*time.Second),
910+
)
911+
backoff.Reset()
912+
defer func() {
913+
a.reg.metrics.processEnvironmentWaitDelay.Add(backoff.GetElapsedTime().Milliseconds())
914+
}()
915+
// TODO(PERFORATOR-1102): loop here is hacky attempt to work around some
916+
// race conditions when we fail to observe correct process environment shortly after process creation.
917+
for i := 0; ; i++ {
918+
envs, err := proc.ListEnvs()
919+
if err != nil {
920+
return err
921+
}
903922

904-
a.log.Debug(ctx, "Put process envs", log.Int("env_count", len(envs)))
905-
a.proc.setEnvs(envs)
923+
if len(envs) > 0 {
924+
a.log.Debug(
925+
ctx,
926+
"Put process envs",
927+
log.Int("env_count", len(envs)),
928+
log.Int("attempts", i),
929+
)
930+
a.proc.setEnvs(envs)
931+
break
932+
}
933+
934+
// we read empty environment.
935+
// While this is technically possible, it is more likely a race
936+
// with a newly created process.
937+
sleepFor := backoff.NextBackOff()
938+
if sleepFor == backoff.Stop {
939+
// Level is not DEBUG because it is the only sign of a possible race
940+
// and processes with actually empty environment are likely to be rare.
941+
a.log.Info(ctx, "Process seems to have empty environment")
942+
a.reg.metrics.processesWithEmptyEnvironment.Inc()
943+
break
944+
}
945+
select {
946+
case <-ctx.Done():
947+
return fmt.Errorf("canceled while obtaining process environment: %w", context.Cause(ctx))
948+
case <-time.After(sleepFor):
949+
}
950+
}
906951
return nil
907952
}
908953

0 commit comments

Comments
 (0)