Skip to content

Commit f06656f

Browse files
authored
[CWS] Fix flaky tests and remove unneeded code (#45508)
### What does this PR do? This PR: * Fixes the TestActionKillContainerWithSignature flakyness * Removes auto-suppression logic from dumps and profiles and related tests * Removes load controller logic and tests (keeping only the size checks of dumps) ### Motivation ### Describe how you validated your changes ### Additional Notes Co-authored-by: jonathan.ribas <jonathan.ribas@datadoghq.com>
1 parent 747e97f commit f06656f

File tree

16 files changed

+19
-1054
lines changed

16 files changed

+19
-1054
lines changed

pkg/security/config/config.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,6 @@ type RuntimeSecurityConfig struct {
278278
ActivityDumpSilentWorkloadsDelay time.Duration
279279
// ActivityDumpSilentWorkloadsTicker configures ticker that will check if a workload is silent and should be traced
280280
ActivityDumpSilentWorkloadsTicker time.Duration
281-
// ActivityDumpAutoSuppressionEnabled bool do not send event if part of a dump
282-
ActivityDumpAutoSuppressionEnabled bool
283281

284282
// # Dynamic configuration fields:
285283
// ActivityDumpMaxDumpSize defines the maximum size of a dump
@@ -302,11 +300,6 @@ type RuntimeSecurityConfig struct {
302300
// SecurityProfileNodeEvictionTimeout defines the timeout after which non-touched nodes are evicted from profiles
303301
SecurityProfileNodeEvictionTimeout time.Duration
304302

305-
// SecurityProfileAutoSuppressionEnabled do not send event if part of a profile
306-
SecurityProfileAutoSuppressionEnabled bool
307-
// SecurityProfileAutoSuppressionEventTypes defines the list of event types the can be auto suppressed using security profiles
308-
SecurityProfileAutoSuppressionEventTypes []model.EventType
309-
310303
// AnomalyDetectionEventTypes defines the list of events that should be allowed to generate anomaly detections
311304
AnomalyDetectionEventTypes []model.EventType
312305
// AnomalyDetectionDefaultMinimumStablePeriod defines the default minimum amount of time during which the events
@@ -566,7 +559,6 @@ func NewRuntimeSecurityConfig() (*RuntimeSecurityConfig, error) {
566559
ActivityDumpSilentWorkloadsDelay: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.activity_dump.silent_workloads.delay"),
567560
ActivityDumpSilentWorkloadsTicker: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.activity_dump.silent_workloads.ticker"),
568561
ActivityDumpWorkloadDenyList: pkgconfigsetup.SystemProbe().GetStringSlice("runtime_security_config.activity_dump.workload_deny_list"),
569-
ActivityDumpAutoSuppressionEnabled: pkgconfigsetup.SystemProbe().GetBool("runtime_security_config.activity_dump.auto_suppression.enabled"),
570562
// activity dump dynamic fields
571563
ActivityDumpMaxDumpSize: func() int {
572564
mds := max(pkgconfigsetup.SystemProbe().GetInt("runtime_security_config.activity_dump.max_dump_size"), ADMinMaxDumSize)
@@ -605,10 +597,6 @@ func NewRuntimeSecurityConfig() (*RuntimeSecurityConfig, error) {
605597
SecurityProfileDNSMatchMaxDepth: pkgconfigsetup.SystemProbe().GetInt("runtime_security_config.security_profile.dns_match_max_depth"),
606598
SecurityProfileNodeEvictionTimeout: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.security_profile.node_eviction_timeout"),
607599

608-
// auto suppression
609-
SecurityProfileAutoSuppressionEnabled: pkgconfigsetup.SystemProbe().GetBool("runtime_security_config.security_profile.auto_suppression.enabled"),
610-
SecurityProfileAutoSuppressionEventTypes: parseEventTypeStringSlice(pkgconfigsetup.SystemProbe().GetStringSlice("runtime_security_config.security_profile.auto_suppression.event_types")),
611-
612600
// anomaly detection
613601
AnomalyDetectionEventTypes: parseEventTypeStringSlice(pkgconfigsetup.SystemProbe().GetStringSlice("runtime_security_config.security_profile.anomaly_detection.event_types")),
614602
AnomalyDetectionDefaultMinimumStablePeriod: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.security_profile.anomaly_detection.default_minimum_stable_period"),

pkg/security/metrics/metrics.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,6 @@ var (
3333
// Tags: rule_id
3434
MetricRateLimiterAllow = newRuntimeMetric(".rules.rate_limiter.allow")
3535

36-
// Rule Suppression metrics
37-
38-
// MetricRulesSuppressed is the name of the metric used to count the number of auto suppressed events
39-
// Tags: rule_id
40-
MetricRulesSuppressed = newRuntimeMetric(".rules.suppressed")
41-
4236
// MetricRulesNoMatch is the number of events that reached userspace but didn't match any rule
4337
// Tags: event_type, category
4438
MetricRulesNoMatch = newRuntimeMetric(".rules.no_match")
@@ -242,9 +236,6 @@ var (
242236
// MetricActivityDumpActiveDumps is the name of the metric used to report the number of active dumps
243237
// Tags: -
244238
MetricActivityDumpActiveDumps = newRuntimeMetric(".activity_dump.active_dumps")
245-
// MetricActivityDumpLoadControllerTriggered is the name of the metric used to report that the ADM load controller reduced the config envelope
246-
// Tags:reduction, event_type
247-
MetricActivityDumpLoadControllerTriggered = newRuntimeMetric(".activity_dump.load_controller_triggered")
248239
// MetricActivityDumpActiveDumpSizeInMemory is the size of an activity dump in memory
249240
// Tags: dump_index
250241
MetricActivityDumpActiveDumpSizeInMemory = newRuntimeMetric(".activity_dump.size_in_memory")
@@ -320,7 +311,7 @@ var (
320311
// Security Profile metrics
321312

322313
// MetricSecurityProfileProfiles is the name of the metric used to report the count of Security Profiles per category
323-
// Tags: in_kernel (true or false), anomaly_detection (true or false), auto_suppression (true or false), workload_hardening (true or false)
314+
// Tags: in_kernel (true or false), anomaly_detection (true or false), workload_hardening (true or false)
324315
MetricSecurityProfileProfiles = newRuntimeMetric(".security_profile.profiles")
325316
// MetricSecurityProfileCacheLen is the name of the metric used to report the size of the Security Profile cache
326317
// Tags: -

pkg/security/module/cws.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -389,16 +389,6 @@ func (c *CWSConsumer) sendStats() {
389389
}
390390

391391
c.ruleEngine.SendStats()
392-
393-
for statsTags, counter := range c.ruleEngine.AutoSuppression.GetStats() {
394-
if counter > 0 {
395-
tags := []string{
396-
"rule_id:" + statsTags.RuleID,
397-
"suppression_type:" + statsTags.SuppressionType,
398-
}
399-
_ = c.statsdClient.Count(metrics.MetricRulesSuppressed, counter, tags, 1.0)
400-
}
401-
}
402392
}
403393

404394
func (c *CWSConsumer) statsSender() {

pkg/security/probe/probe_ebpf.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,6 @@ func (p *EBPFProbe) DispatchEvent(event *model.Event, notifyConsumers bool) {
897897
p.profileManager.LookupEventInProfiles(event)
898898

899899
// mark the events that have an associated activity dump
900-
// this is needed for auto suppressions performed by the CWS rule engine
901900
if p.profileManager.HasActiveActivityDump(event) {
902901
event.AddToFlags(model.EventFlagsHasActiveActivityDump)
903902
}

pkg/security/rules/autosuppression/autosuppression.go

Lines changed: 0 additions & 144 deletions
This file was deleted.

pkg/security/rules/engine.go

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"github.com/DataDog/datadog-agent/pkg/security/probe"
3030
"github.com/DataDog/datadog-agent/pkg/security/proto/api"
3131
"github.com/DataDog/datadog-agent/pkg/security/rconfig"
32-
"github.com/DataDog/datadog-agent/pkg/security/rules/autosuppression"
3332
"github.com/DataDog/datadog-agent/pkg/security/rules/bundled"
3433
"github.com/DataDog/datadog-agent/pkg/security/rules/filtermodel"
3534
"github.com/DataDog/datadog-agent/pkg/security/rules/monitor"
@@ -65,7 +64,6 @@ type RuleEngine struct {
6564
statsdClient statsd.ClientInterface
6665
eventSender events.EventSender
6766
rulesetListeners []rules.RuleSetListener
68-
AutoSuppression autosuppression.AutoSuppression
6967
pid uint32
7068
wg sync.WaitGroup
7169
ipc ipc.Component
@@ -103,14 +101,6 @@ func NewRuleEngine(evm *eventmonitor.EventMonitor, config *config.RuntimeSecurit
103101

104102
engine.noMatchCounters = make([]atomic.Uint64, model.MaxAllEventType)
105103

106-
engine.AutoSuppression.Init(autosuppression.Opts{
107-
SecurityProfileEnabled: config.SecurityProfileEnabled,
108-
SecurityProfileAutoSuppressionEnabled: config.SecurityProfileAutoSuppressionEnabled,
109-
ActivityDumpEnabled: config.ActivityDumpEnabled,
110-
ActivityDumpAutoSuppressionEnabled: config.ActivityDumpAutoSuppressionEnabled,
111-
EventTypes: config.SecurityProfileAutoSuppressionEventTypes,
112-
})
113-
114104
// register as event handler
115105
if err := probe.AddEventHandler(engine); err != nil {
116106
return nil, err
@@ -409,9 +399,6 @@ func (e *RuleEngine) LoadPolicies(providers []rules.PolicyProvider, sendLoadedRe
409399
// set the rate limiters on sending events to the backend
410400
e.rateLimiter.Apply(rs, events.AllCustomRuleIDs())
411401

412-
// update the stats of auto-suppression rules
413-
e.AutoSuppression.Apply(rs)
414-
415402
if replayEvents {
416403
e.probe.ReplayEvents()
417404
}
@@ -544,15 +531,11 @@ func (e *RuleEngine) EventDiscarderFound(rs *rules.RuleSet, event eval.Event, fi
544531
func (e *RuleEngine) RuleMatch(ctx *eval.Context, rule *rules.Rule, event eval.Event) bool {
545532
ev := event.(*model.Event)
546533

547-
// add matched rules before any auto suppression check to ensure that this information is available in activity dumps
534+
// add matched rules to ensure that this information is available in activity dumps
548535
if ev.ProcessContext.Process.ContainerContext.ContainerID != "" && (e.config.ActivityDumpTagRulesEnabled || e.config.AnomalyDetectionTagRulesEnabled) {
549536
ev.Rules = append(ev.Rules, model.NewMatchedRule(rule.Def.ID, rule.Def.Version, rule.Def.Tags, rule.Policy.Name, rule.Policy.Version))
550537
}
551538

552-
if e.AutoSuppression.Suppresses(rule, ev) {
553-
return false
554-
}
555-
556539
e.probe.HandleActions(rule, event)
557540

558541
if rule.Def.Silent {

pkg/security/security_profile/load_controller.go

Lines changed: 8 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -51,71 +51,23 @@ func (m *Manager) getDefaultLoadConfig() *model.ActivityDumpLoadConfig {
5151
return m.activityDumpLoadConfig
5252
}
5353

54-
func (m *Manager) sendLoadControllerTriggeredMetric(tags []string) error {
55-
if err := m.statsdClient.Count(metrics.MetricActivityDumpLoadControllerTriggered, 1, tags, 1.0); err != nil {
56-
return fmt.Errorf("couldn't send %s metric: %v", metrics.MetricActivityDumpLoadControllerTriggered, err)
57-
}
58-
return nil
59-
}
60-
6154
func (m *Manager) nextPartialDump(prev *dump.ActivityDump) *dump.ActivityDump {
6255
previousLoadConfig := prev.LoadConfig.Load()
63-
timeToThreshold := time.Since(prev.Profile.Metadata.Start)
64-
65-
newRate := previousLoadConfig.Rate
66-
if timeToThreshold < m.minDumpTimeout {
67-
newRate = previousLoadConfig.Rate * 3 / 4 // reduce by 25%
68-
if err := m.sendLoadControllerTriggeredMetric([]string{"reduction:rate"}); err != nil {
69-
seclog.Errorf("%v", err)
70-
}
71-
}
72-
73-
newTimeout := previousLoadConfig.Timeout
74-
if timeToThreshold < m.minDumpTimeout/2 && previousLoadConfig.Timeout > m.minDumpTimeout {
75-
newTimeout = previousLoadConfig.Timeout * 3 / 4 // reduce by 25%
76-
if newTimeout < m.minDumpTimeout {
77-
newTimeout = m.minDumpTimeout
78-
}
79-
if err := m.sendLoadControllerTriggeredMetric([]string{"reduction:dump_timeout"}); err != nil {
80-
seclog.Errorf("%v", err)
81-
}
82-
}
83-
84-
newEvents := make([]model.EventType, len(previousLoadConfig.TracedEventTypes))
85-
copy(newEvents, previousLoadConfig.TracedEventTypes)
86-
if timeToThreshold < m.minDumpTimeout/4 {
87-
var evtToRemove model.EventType
88-
newEvents = newEvents[:0]
89-
reductionOrder:
90-
for _, evt := range TracedEventTypesReductionOrder {
91-
for _, tracedEvt := range previousLoadConfig.TracedEventTypes {
92-
if evt == tracedEvt {
93-
evtToRemove = evt
94-
break reductionOrder
95-
}
96-
}
97-
}
98-
for _, evt := range previousLoadConfig.TracedEventTypes {
99-
if evt != evtToRemove {
100-
newEvents = append(newEvents, evt)
101-
}
102-
}
103-
104-
if evtToRemove != model.UnknownEventType {
105-
if err := m.sendLoadControllerTriggeredMetric([]string{"reduction:traced_event_types", "event_type:" + evtToRemove.String()}); err != nil {
106-
seclog.Errorf("%v", err)
107-
}
108-
}
109-
}
11056

11157
now := time.Now()
112-
newLoadConfig := m.newActivityDumpLoadConfig(newEvents, newTimeout, m.config.RuntimeSecurity.ActivityDumpCgroupWaitListTimeout, newRate, now)
58+
newLoadConfig := m.newActivityDumpLoadConfig(
59+
previousLoadConfig.TracedEventTypes,
60+
previousLoadConfig.Timeout,
61+
m.config.RuntimeSecurity.ActivityDumpCgroupWaitListTimeout,
62+
previousLoadConfig.Rate,
63+
now,
64+
)
11365
newDump := dump.NewActivityDump(m.pathsReducer, prev.Profile.Metadata.DifferentiateArgs, 0, m.config.RuntimeSecurity.ActivityDumpTracedEventTypes, m.updateTracedPid, newLoadConfig, func(ad *dump.ActivityDump) {
11466
ad.Profile.Header = prev.Profile.Header
11567
ad.Profile.Metadata = prev.Profile.Metadata
11668
ad.Profile.Metadata.Name = "activity-dump-" + utils.RandString(10)
11769
ad.Profile.Metadata.Start = now
118-
ad.Profile.Metadata.End = now.Add(newTimeout)
70+
ad.Profile.Metadata.End = now.Add(previousLoadConfig.Timeout)
11971
ad.Profile.AddTags(prev.Profile.GetTags())
12072
})
12173

pkg/security/security_profile/manager.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,6 @@ func NewManager(cfg *config.Config, statsdClient statsd.ClientInterface, ebpf *e
270270
}
271271

272272
var secProfEventTypes []model.EventType
273-
if cfg.RuntimeSecurity.SecurityProfileAutoSuppressionEnabled {
274-
secProfEventTypes = append(secProfEventTypes, cfg.RuntimeSecurity.SecurityProfileAutoSuppressionEventTypes...)
275-
}
276273
if cfg.RuntimeSecurity.AnomalyDetectionEnabled {
277274
secProfEventTypes = append(secProfEventTypes, cfg.RuntimeSecurity.AnomalyDetectionEventTypes...)
278275
}

0 commit comments

Comments
 (0)