Skip to content

Commit e007f2d

Browse files
authored
Enable requesting flares with profiles via Remote Config (#31398)
1 parent 4abf43e commit e007f2d

File tree

27 files changed

+967
-249
lines changed

27 files changed

+967
-249
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@
319319
/comp/core/configsync @DataDog/agent-configuration
320320
/comp/core/flare @DataDog/agent-configuration
321321
/comp/core/gui @DataDog/agent-configuration
322+
/comp/core/profiler @DataDog/agent-configuration
322323
/comp/core/secrets @DataDog/agent-configuration
323324
/comp/core/settings @DataDog/agent-configuration
324325
/comp/core/status @DataDog/agent-configuration

cmd/agent/subcommands/flare/command.go

Lines changed: 27 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,20 @@ import (
1010
"bytes"
1111
"encoding/json"
1212
"fmt"
13-
"io"
1413
"net"
15-
"net/http"
1614
"net/url"
1715
"os"
1816
"path"
1917
"strconv"
2018
"time"
2119

2220
"github.com/fatih/color"
23-
"github.com/hashicorp/go-multierror"
2421
"github.com/spf13/cobra"
2522
"go.uber.org/fx"
2623

2724
"github.com/DataDog/datadog-agent/cmd/agent/command"
2825
"github.com/DataDog/datadog-agent/cmd/agent/common"
2926
"github.com/DataDog/datadog-agent/cmd/agent/subcommands/streamlogs"
30-
sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
3127
"github.com/DataDog/datadog-agent/comp/aggregator/diagnosesendermanager/diagnosesendermanagerimpl"
3228
authtokenimpl "github.com/DataDog/datadog-agent/comp/api/authtoken/fetchonlyimpl"
3329
"github.com/DataDog/datadog-agent/comp/collector/collector"
@@ -36,8 +32,13 @@ import (
3632
"github.com/DataDog/datadog-agent/comp/core/config"
3733
"github.com/DataDog/datadog-agent/comp/core/flare"
3834
"github.com/DataDog/datadog-agent/comp/core/flare/helpers"
35+
flaretypes "github.com/DataDog/datadog-agent/comp/core/flare/types"
3936
log "github.com/DataDog/datadog-agent/comp/core/log/def"
37+
flareprofilerdef "github.com/DataDog/datadog-agent/comp/core/profiler/def"
38+
flareprofilerfx "github.com/DataDog/datadog-agent/comp/core/profiler/fx"
4039
"github.com/DataDog/datadog-agent/comp/core/secrets"
40+
coresettings "github.com/DataDog/datadog-agent/comp/core/settings"
41+
"github.com/DataDog/datadog-agent/comp/core/settings/settingsimpl"
4142
"github.com/DataDog/datadog-agent/comp/core/sysprobeconfig"
4243
"github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/sysprobeconfigimpl"
4344
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
@@ -124,12 +125,23 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
124125
defaultpaths.DogstatsDLogFile,
125126
defaultpaths.StreamlogsLogFile,
126127
)),
128+
flareprofilerfx.Module(),
127129
// workloadmeta setup
128130
wmcatalog.GetCatalog(),
129131
workloadmetafx.Module(workloadmeta.Params{
130132
AgentType: workloadmeta.NodeAgent,
131133
InitHelper: common.GetWorkloadmetaInit(),
132134
}),
135+
fx.Provide(func(config config.Component) coresettings.Params {
136+
return coresettings.Params{
137+
// A settings object is required to populate some dependencies, but
138+
// no values are valid since the flare runs by default in a separate
139+
// process from the main agent.
140+
Settings: map[string]coresettings.RuntimeSetting{},
141+
Config: config,
142+
}
143+
}),
144+
settingsimpl.Module(),
133145
localTaggerfx.Module(tagger.Params{}),
134146
autodiscoveryimpl.Module(),
135147
fx.Supply(option.None[collector.Component]()),
@@ -171,147 +183,16 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
171183
return []*cobra.Command{flareCmd}
172184
}
173185

174-
func readProfileData(seconds int) (flare.ProfileData, error) {
175-
type agentProfileCollector func(service string) error
176-
177-
pdata := flare.ProfileData{}
178-
c := util.GetClient(false)
179-
180-
type pprofGetter func(path string) ([]byte, error)
181-
182-
tcpGet := func(portConfig string, onHTTPS bool) pprofGetter {
183-
endpoint := url.URL{
184-
Scheme: "http",
185-
Host: net.JoinHostPort("127.0.0.1", strconv.Itoa(pkgconfigsetup.Datadog().GetInt(portConfig))),
186-
Path: "/debug/pprof",
187-
}
188-
if onHTTPS {
189-
endpoint.Scheme = "https"
190-
}
191-
192-
return func(path string) ([]byte, error) {
193-
return util.DoGet(c, endpoint.String()+path, util.LeaveConnectionOpen)
194-
}
195-
}
196-
197-
serviceProfileCollector := func(get func(url string) ([]byte, error), seconds int) agentProfileCollector {
198-
return func(service string) error {
199-
fmt.Fprintln(color.Output, color.BlueString("Getting a %ds profile snapshot from %s.", seconds, service))
200-
for _, prof := range []struct{ name, path string }{
201-
{
202-
// 1st heap profile
203-
name: service + "-1st-heap.pprof",
204-
path: "/heap",
205-
},
206-
{
207-
// CPU profile
208-
name: service + "-cpu.pprof",
209-
path: fmt.Sprintf("/profile?seconds=%d", seconds),
210-
},
211-
{
212-
// 2nd heap profile
213-
name: service + "-2nd-heap.pprof",
214-
path: "/heap",
215-
},
216-
{
217-
// mutex profile
218-
name: service + "-mutex.pprof",
219-
path: "/mutex",
220-
},
221-
{
222-
// goroutine blocking profile
223-
name: service + "-block.pprof",
224-
path: "/block",
225-
},
226-
{
227-
// Trace
228-
name: service + ".trace",
229-
path: fmt.Sprintf("/trace?seconds=%d", seconds),
230-
},
231-
} {
232-
b, err := get(prof.path)
233-
if err != nil {
234-
return err
235-
}
236-
pdata[prof.name] = b
237-
}
238-
return nil
239-
}
240-
}
241-
242-
agentCollectors := map[string]agentProfileCollector{
243-
"core": serviceProfileCollector(tcpGet("expvar_port", false), seconds),
244-
"security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port", false), seconds),
245-
}
246-
247-
if pkgconfigsetup.Datadog().GetBool("process_config.enabled") ||
248-
pkgconfigsetup.Datadog().GetBool("process_config.container_collection.enabled") ||
249-
pkgconfigsetup.Datadog().GetBool("process_config.process_collection.enabled") {
250-
251-
agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port", false), seconds)
252-
}
253-
254-
if pkgconfigsetup.Datadog().GetBool("apm_config.enabled") {
255-
traceCpusec := pkgconfigsetup.Datadog().GetInt("apm_config.receiver_timeout")
256-
if traceCpusec > seconds {
257-
// do not exceed requested duration
258-
traceCpusec = seconds
259-
} else if traceCpusec <= 0 {
260-
// default to 4s as maximum connection timeout of trace-agent HTTP server is 5s by default
261-
traceCpusec = 4
262-
}
263-
264-
agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port", true), traceCpusec)
265-
}
266-
267-
if pkgconfigsetup.SystemProbe().GetBool("system_probe_config.enabled") {
268-
client := &http.Client{
269-
Transport: &http.Transport{
270-
DialContext: sysprobeclient.DialContextFunc(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket")),
271-
},
272-
}
273-
274-
sysProbeGet := func() pprofGetter {
275-
return func(path string) ([]byte, error) {
276-
var buf bytes.Buffer
277-
pprofURL := sysprobeclient.DebugURL("/pprof" + path)
278-
req, err := http.NewRequest(http.MethodGet, pprofURL, &buf)
279-
if err != nil {
280-
return nil, err
281-
}
282-
283-
res, err := client.Do(req)
284-
if err != nil {
285-
return nil, err
286-
}
287-
defer res.Body.Close()
288-
289-
return io.ReadAll(res.Body)
290-
}
291-
}
292-
293-
agentCollectors["system-probe"] = serviceProfileCollector(sysProbeGet(), seconds)
294-
}
295-
296-
var errs error
297-
for name, callback := range agentCollectors {
298-
if err := callback(name); err != nil {
299-
errs = multierror.Append(errs, fmt.Errorf("error collecting %s agent profile: %v", name, err))
300-
}
301-
}
302-
303-
return pdata, errs
304-
}
305-
306186
func makeFlare(flareComp flare.Component,
307187
lc log.Component,
308188
config config.Component,
309189
_ sysprobeconfig.Component,
310190
cliParams *cliParams,
311191
_ option.Option[workloadmeta.Component],
312-
_ tagger.Component) error {
192+
_ tagger.Component,
193+
flareprofiler flareprofilerdef.Component) error {
313194
var (
314-
profile flare.ProfileData
195+
profile flaretypes.ProfileData
315196
err error
316197
)
317198

@@ -359,8 +240,13 @@ func makeFlare(flareComp flare.Component,
359240
ProfileBlockingRate: cliParams.profileBlockingRate,
360241
}
361242

243+
logFunc := func(s string, params ...interface{}) error {
244+
fmt.Fprintln(color.Output, color.BlueString(s, params...))
245+
return nil
246+
}
247+
362248
err = settings.ExecWithRuntimeProfilingSettings(func() {
363-
if profile, err = readProfileData(cliParams.profiling); err != nil {
249+
if profile, err = flareprofiler.ReadProfileData(cliParams.profiling, logFunc); err != nil {
364250
fmt.Fprintln(color.Output, color.YellowString(fmt.Sprintf("Could not collect performance profile data: %s", err)))
365251
}
366252
}, profilingOpts, c)
@@ -414,7 +300,7 @@ func makeFlare(flareComp flare.Component,
414300
return nil
415301
}
416302

417-
func requestArchive(flareComp flare.Component, pdata flare.ProfileData, providerTimeout time.Duration) (string, error) {
303+
func requestArchive(flareComp flare.Component, pdata flaretypes.ProfileData, providerTimeout time.Duration) (string, error) {
418304
fmt.Fprintln(color.Output, color.BlueString("Asking the agent to build the flare archive."))
419305
c := util.GetClient(false) // FIX: get certificates right then make this true
420306
ipcAddress, err := pkgconfigsetup.GetIPCAddress(pkgconfigsetup.Datadog())
@@ -464,7 +350,7 @@ func requestArchive(flareComp flare.Component, pdata flare.ProfileData, provider
464350
return string(r), nil
465351
}
466352

467-
func createArchive(flareComp flare.Component, pdata flare.ProfileData, providerTimeout time.Duration, ipcError error) (string, error) {
353+
func createArchive(flareComp flare.Component, pdata flaretypes.ProfileData, providerTimeout time.Duration, ipcError error) (string, error) {
468354
fmt.Fprintln(color.Output, color.YellowString("Initiating flare locally."))
469355
filePath, err := flareComp.Create(pdata, providerTimeout, ipcError)
470356
if err != nil {

0 commit comments

Comments
 (0)