Skip to content

Commit 9d4dfd4

Browse files
authored
Add metadata to identify Python and Ruby processes (#1868)
- Add identifier metadata for Ruby and Python - Introduce runtime package for common functionality depends: #1867 ### Why? This additional metadata helps us to query Python and Ruby specific profiles from executable of the running processes. Right now, we took a naive approach and check for the common symbols of interpreters across the versions. Further iterations will come. ### What? <!-- copilot:summary --> ### <samp>🤖 Generated by Copilot at 3dfad75</samp> This pull request adds metadata providers for Python and Ruby processes, and refactors the existing Java provider. It also introduces new functions in `pkg/runtime` to detect Python and Ruby processes based on ELF symbols. The `hsperfdata` package is moved and renamed to `java` in `pkg/runtime` to improve code organization. ### How? <!-- copilot:walkthrough --> ### <samp>🤖 Generated by Copilot at 3dfad75</samp> * Add metadata providers for Python and Ruby processes ([link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-57833d176ed681886b67a7fa2e4dc2f68d5e00cb1db269e7854fb3dafb45f54eL664-R666), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-b5bcdce21d9b76af9586cf79fa08cba39b460e8afdae697b0c372a660812fd31R1-R96), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-47648631ed7788443bfebfb8c3a943990bca4c75647c9c45416c349d483b0b55R1-R96)) * Rename `java_process.go` to `java.go` and `JavaProcess` to `Java` in `pkg/metadata` ([link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-35835b1e76e2185d9ae1138faf6c0b347ce295ca3f965b004c8e8b64ce34a8f3L24-R29)) * Move `hsperfdata.go` from `pkg/hsperfdata` to `pkg/runtime/java` and rename package to `java` ([link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-bffa53f69a05fb77178a7088b45cc22da039ad07dcf9f4de99762e508c620bc6L15-R15)) * Rename `Cache` to `HSPerfDataCache` and update related functions and methods in `pkg/runtime/java/hsperfdata.go` ([link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-bffa53f69a05fb77178a7088b45cc22da039ad07dcf9f4de99762e508c620bc6L37-R37), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-bffa53f69a05fb77178a7088b45cc22da039ad07dcf9f4de99762e508c620bc6L54-R55), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-bffa53f69a05fb77178a7088b45cc22da039ad07dcf9f4de99762e508c620bc6L67-R67), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-bffa53f69a05fb77178a7088b45cc22da039ad07dcf9f4de99762e508c620bc6L80-R80)) * Add `IsPython` and `IsRuby` functions to `pkg/runtime` to check ELF files for Python and Ruby identifiers ([link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-7ef2d85bbb21b340c4273db9f8c50c07d567cce478d6e2b2fbd3e8e5fe67d915R1-R70), [link](https://github.com/parca-dev/parca-agent/pull/1868/files?diff=unified&w=0#diff-e446dd52ecce11cbc98b8dc12cc36fff325aca8d376f2353192e153f59028e77R1-R72)) ### Test Plan 1. Local tests (using containers with different versions of interpreters) 2. CI intergration tests ![CleanShot 2023-07-18 at 19 53 26](https://github.com/parca-dev/parca-agent/assets/536449/2b9a3af5-526e-4fb3-bb7a-52e068739d50) ![CleanShot 2023-07-18 at 19 53 08](https://github.com/parca-dev/parca-agent/assets/536449/5a336bb3-7bc7-40c6-b899-4de242425f06)
2 parents 574fc81 + ba96eae commit 9d4dfd4

File tree

7 files changed

+350
-10
lines changed

7 files changed

+350
-10
lines changed

cmd/parca-agent/main.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,9 @@ func run(logger log.Logger, reg *prometheus.Registry, flags flags) error {
661661
metadata.Target(flags.Node, flags.Metadata.ExternalLabels),
662662
metadata.Compiler(logger, reg, ofp),
663663
metadata.Process(pfs),
664-
metadata.JavaProcess(logger, nsCache),
664+
metadata.Java(logger, nsCache),
665+
metadata.Ruby(pfs, reg, ofp),
666+
metadata.Python(pfs, reg, ofp),
665667
metadata.System(),
666668
metadata.PodHosts(),
667669
},
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ import (
2121
"github.com/go-kit/log"
2222
"github.com/prometheus/common/model"
2323

24-
"github.com/parca-dev/parca-agent/pkg/hsperfdata"
2524
"github.com/parca-dev/parca-agent/pkg/namespace"
25+
"github.com/parca-dev/parca-agent/pkg/runtime/java"
2626
)
2727

28-
func JavaProcess(logger log.Logger, nsCache *namespace.Cache) Provider {
29-
cache := hsperfdata.NewCache(logger, nsCache)
28+
func Java(logger log.Logger, nsCache *namespace.Cache) Provider {
29+
cache := java.NewHSPerfDataCache(logger, nsCache)
3030

3131
return &StatelessProvider{"java process", func(ctx context.Context, pid int) (model.LabelSet, error) {
3232
if ctx.Err() != nil {

pkg/metadata/python.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Copyright 2022-2023 The Parca Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
//
14+
15+
//nolint:dupl
16+
package metadata
17+
18+
import (
19+
"context"
20+
"fmt"
21+
"strings"
22+
23+
"github.com/prometheus/client_golang/prometheus"
24+
"github.com/prometheus/common/model"
25+
"github.com/prometheus/procfs"
26+
27+
"github.com/parca-dev/parca-agent/pkg/cache"
28+
"github.com/parca-dev/parca-agent/pkg/objectfile"
29+
"github.com/parca-dev/parca-agent/pkg/runtime"
30+
)
31+
32+
func Python(procfs procfs.FS, reg prometheus.Registerer, objFilePool *objectfile.Pool) Provider {
33+
cache := cache.NewLRUCache[string, bool](
34+
prometheus.WrapRegistererWith(prometheus.Labels{"cache": "metadata_python"}, reg),
35+
512,
36+
)
37+
return &StatelessProvider{"python", func(ctx context.Context, pid int) (model.LabelSet, error) {
38+
if ctx.Err() != nil {
39+
return nil, ctx.Err()
40+
}
41+
42+
p, err := procfs.Proc(pid)
43+
if err != nil {
44+
return nil, fmt.Errorf("failed to instantiate procfs for PID %d: %w", pid, err)
45+
}
46+
47+
executable, err := p.Executable()
48+
if err != nil {
49+
return nil, fmt.Errorf("failed to get executable for PID %d: %w", pid, err)
50+
}
51+
52+
if python, ok := cache.Get(executable); ok {
53+
if !python {
54+
return nil, nil
55+
}
56+
return model.LabelSet{
57+
"python": model.LabelValue(fmt.Sprintf("%t", true)),
58+
}, nil
59+
}
60+
61+
comm, err := p.Comm()
62+
if err != nil {
63+
return nil, fmt.Errorf("failed to get comm for PID %d: %w", pid, err)
64+
}
65+
66+
if strings.HasPrefix(comm, "python") {
67+
cache.Add(executable, true)
68+
return model.LabelSet{
69+
"python": model.LabelValue(fmt.Sprintf("%t", true)),
70+
}, nil
71+
}
72+
73+
obj, err := objFilePool.Open(executable)
74+
if err != nil {
75+
return nil, fmt.Errorf("failed to open ELF file for process %d: %w", pid, err)
76+
}
77+
78+
ef, release, err := obj.ELF()
79+
if err != nil {
80+
return nil, fmt.Errorf("failed to get ELF file for process %d: %w", pid, err)
81+
}
82+
defer release()
83+
84+
python, err := runtime.IsPython(ef)
85+
if err != nil {
86+
return nil, fmt.Errorf("failed to determine if PID %d belongs to a python process: %w", pid, err)
87+
}
88+
89+
cache.Add(executable, python)
90+
if !python {
91+
return nil, nil
92+
}
93+
return model.LabelSet{
94+
"python": model.LabelValue(fmt.Sprintf("%t", true)),
95+
}, nil
96+
}}
97+
}

pkg/metadata/ruby.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Copyright 2022-2023 The Parca Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
//
14+
15+
//nolint:dupl
16+
package metadata
17+
18+
import (
19+
"context"
20+
"fmt"
21+
"strings"
22+
23+
"github.com/prometheus/client_golang/prometheus"
24+
"github.com/prometheus/common/model"
25+
"github.com/prometheus/procfs"
26+
27+
"github.com/parca-dev/parca-agent/pkg/cache"
28+
"github.com/parca-dev/parca-agent/pkg/objectfile"
29+
"github.com/parca-dev/parca-agent/pkg/runtime"
30+
)
31+
32+
func Ruby(procfs procfs.FS, reg prometheus.Registerer, objFilePool *objectfile.Pool) Provider {
33+
cache := cache.NewLRUCache[string, bool](
34+
prometheus.WrapRegistererWith(prometheus.Labels{"cache": "metadata_ruby"}, reg),
35+
512,
36+
)
37+
return &StatelessProvider{"ruby", func(ctx context.Context, pid int) (model.LabelSet, error) {
38+
if ctx.Err() != nil {
39+
return nil, ctx.Err()
40+
}
41+
42+
p, err := procfs.Proc(pid)
43+
if err != nil {
44+
return nil, fmt.Errorf("failed to instantiate procfs for PID %d: %w", pid, err)
45+
}
46+
47+
executable, err := p.Executable()
48+
if err != nil {
49+
return nil, fmt.Errorf("failed to get executable for PID %d: %w", pid, err)
50+
}
51+
52+
if ruby, ok := cache.Get(executable); ok {
53+
if !ruby {
54+
return nil, nil
55+
}
56+
return model.LabelSet{
57+
"ruby": model.LabelValue(fmt.Sprintf("%t", true)),
58+
}, nil
59+
}
60+
61+
comm, err := p.Comm()
62+
if err != nil {
63+
return nil, fmt.Errorf("failed to get comm for PID %d: %w", pid, err)
64+
}
65+
66+
if strings.HasPrefix(comm, "ruby") {
67+
cache.Add(executable, true)
68+
return model.LabelSet{
69+
"ruby": model.LabelValue(fmt.Sprintf("%t", true)),
70+
}, nil
71+
}
72+
73+
obj, err := objFilePool.Open(executable)
74+
if err != nil {
75+
return nil, fmt.Errorf("failed to open ELF file for process %d: %w", pid, err)
76+
}
77+
78+
ef, release, err := obj.ELF()
79+
if err != nil {
80+
return nil, fmt.Errorf("failed to get ELF file for process %d: %w", pid, err)
81+
}
82+
defer release()
83+
84+
ruby, err := runtime.IsRuby(ef)
85+
if err != nil {
86+
return nil, fmt.Errorf("failed to determine if PID %d belongs to a ruby process: %w", pid, err)
87+
}
88+
89+
cache.Add(executable, ruby)
90+
if !ruby {
91+
return nil, nil
92+
}
93+
return model.LabelSet{
94+
"ruby": model.LabelValue(fmt.Sprintf("%t", true)),
95+
}, nil
96+
}}
97+
}
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// limitations under the License.
1313
//
1414

15-
package hsperfdata
15+
package java
1616

1717
import (
1818
"errors"
@@ -34,7 +34,7 @@ import (
3434

3535
const hsperfdata = "/tmp/hsperfdata_*"
3636

37-
type Cache struct {
37+
type HSPerfDataCache struct {
3838
fs fs.FS
3939
logger log.Logger
4040

@@ -51,8 +51,8 @@ func (f *realfs) Open(name string) (fs.File, error) {
5151
return os.Open(name)
5252
}
5353

54-
func NewCache(logger log.Logger, nsCache *namespace.Cache) *Cache {
55-
return &Cache{
54+
func NewHSPerfDataCache(logger log.Logger, nsCache *namespace.Cache) *HSPerfDataCache {
55+
return &HSPerfDataCache{
5656
fs: &realfs{},
5757
logger: logger,
5858

@@ -64,7 +64,7 @@ func NewCache(logger log.Logger, nsCache *namespace.Cache) *Cache {
6464
}
6565
}
6666

67-
func (c *Cache) Exists(pid int) bool {
67+
func (c *HSPerfDataCache) Exists(pid int) bool {
6868
c.mu.Lock()
6969
defer c.mu.Unlock()
7070

@@ -77,7 +77,7 @@ func (c *Cache) Exists(pid int) bool {
7777
// running on host and then searches in /proc/{pid}/root/tmp for processes
7878
// running in containers. Note that pids are assumed to be unique regardless
7979
// of username.
80-
func (c *Cache) IsJavaProcess(pid int) (bool, error) {
80+
func (c *HSPerfDataCache) IsJavaProcess(pid int) (bool, error) {
8181
// Check if the pid is in the cache.
8282
if c.Exists(pid) {
8383
return true, nil

pkg/runtime/python.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright 2022-2023 The Parca Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
//
14+
15+
package runtime
16+
17+
import (
18+
"debug/elf"
19+
"errors"
20+
"fmt"
21+
)
22+
23+
func IsPython(ef *elf.File) (bool, error) {
24+
python := false
25+
26+
syms, err := ef.Symbols()
27+
if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
28+
return python, fmt.Errorf("failed to get symbols: %w", err)
29+
}
30+
for _, sym := range syms {
31+
if isPythonIdentifyingSymbol(sym.Name) {
32+
python = true
33+
break
34+
}
35+
}
36+
37+
if !python {
38+
dynSyms, err := ef.DynamicSymbols()
39+
if err != nil {
40+
return python, fmt.Errorf("failed to get dynamic symbols: %w", err)
41+
}
42+
for _, sym := range dynSyms {
43+
if isPythonIdentifyingSymbol(sym.Name) {
44+
python = true
45+
break
46+
}
47+
}
48+
}
49+
50+
return python, nil
51+
}
52+
53+
/*
54+
Python symbols to look for:
55+
56+
2.7:`Py_Main`
57+
58+
3.2:`Py_Main`
59+
3.3:`Py_Main`
60+
3.4:`Py_Main`
61+
3.5:`Py_Main`
62+
3.6:`Py_Main`
63+
3.7:`_Py_UnixMain`
64+
3.8:`Py_BytesMain`
65+
3.9:`Py_BytesMain`
66+
3.10:`Py_BytesMain`
67+
3.11:`Py_BytesMain`
68+
*/
69+
func isPythonIdentifyingSymbol(sym string) bool {
70+
return sym == "Py_Main" || sym == "_Py_UnixMain" || sym == "Py_BytesMain"
71+
}

0 commit comments

Comments
 (0)