Skip to content

Commit 8b77ede

Browse files
kylos101ona-agent
andauthored
[agent-smith] introduce file detector (#21010)
* feat(agent-smith): implement filesystem signature scanning - Add filesystem scanning capability to detect suspicious files in workspaces - Scan workspace directories directly from WorkingArea/{InstanceID} paths - Support filesystem signatures with filename patterns and regex matching - Add FilesystemScanning configuration with WorkingArea path - Integrate filesystem detection with existing signature classifier - Fix regex pattern matching in signature matching logic - Add comprehensive filesystem scanning tests - Update example configuration with filesystem signatures Co-authored-by: Ona <[email protected]> * cleanup * Use a separate func for matching for filesystem signatures * Fix logging for successful match * Simplify & no metrics Co-authored-by: Ona <[email protected]> * Don't get fooled by the match * Revert "Don't get fooled by the match" This reverts commit 124b7ac. Co-authored-by: Ona <[email protected]> * Cleanup Co-authored-by: Ona <[email protected]> * More cleanup * Renaming and metric removal * Fix build --------- Co-authored-by: Ona <[email protected]>
1 parent 0d84199 commit 8b77ede

File tree

13 files changed

+1447
-37
lines changed

13 files changed

+1447
-37
lines changed

components/ee/agent-smith/example-config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,11 @@
1313
}
1414
]
1515
}
16+
},
17+
"filesystemScanning": {
18+
"enabled": true,
19+
"scanInterval": "5m",
20+
"maxFileSize": 1024,
21+
"workingArea": "/mnt/workingarea-mk2"
1622
}
1723
}

components/ee/agent-smith/pkg/agent/agent.go

Lines changed: 105 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@ type Smith struct {
5151
timeElapsedHandler func(t time.Time) time.Duration
5252
notifiedInfringements *lru.Cache
5353

54-
detector detector.ProcessDetector
55-
classifier classifier.ProcessClassifier
54+
detector detector.ProcessDetector
55+
classifier classifier.ProcessClassifier
56+
fileDetector detector.FileDetector
57+
fileClassifier classifier.FileClassifier
5658
}
5759

5860
// NewAgentSmith creates a new agent smith
@@ -135,6 +137,32 @@ func NewAgentSmith(cfg config.Config) (*Smith, error) {
135137
return nil, err
136138
}
137139

140+
// Initialize filesystem detection if enabled
141+
var filesystemDetec detector.FileDetector
142+
var filesystemClass classifier.FileClassifier
143+
if cfg.FilesystemScanning != nil && cfg.FilesystemScanning.Enabled {
144+
// Create filesystem detector config
145+
fsConfig := detector.FileScanningConfig{
146+
Enabled: cfg.FilesystemScanning.Enabled,
147+
ScanInterval: cfg.FilesystemScanning.ScanInterval.Duration,
148+
MaxFileSize: cfg.FilesystemScanning.MaxFileSize,
149+
WorkingArea: cfg.FilesystemScanning.WorkingArea,
150+
}
151+
152+
// Create independent filesystem classifier (no dependency on process classifier)
153+
filesystemClass, err = cfg.Blocklists.FileClassifier()
154+
if err != nil {
155+
log.WithError(err).Error("failed to create filesystem classifier")
156+
} else {
157+
filesystemDetec, err = detector.NewfileDetector(fsConfig, filesystemClass)
158+
if err != nil {
159+
log.WithError(err).Error("failed to create filesystem detector")
160+
} else {
161+
log.Info("Filesystem detector created successfully with independent classifier")
162+
}
163+
}
164+
}
165+
138166
m := newAgentMetrics()
139167
res := &Smith{
140168
EnforcementRules: map[string]config.EnforcementRules{
@@ -150,8 +178,10 @@ func NewAgentSmith(cfg config.Config) (*Smith, error) {
150178

151179
wsman: wsman,
152180

153-
detector: detec,
154-
classifier: class,
181+
detector: detec,
182+
classifier: class,
183+
fileDetector: filesystemDetec,
184+
fileClassifier: filesystemClass,
155185

156186
notifiedInfringements: lru.New(notificationCacheSize),
157187
metrics: m,
@@ -227,17 +257,34 @@ type classifiedProcess struct {
227257
Err error
228258
}
229259

260+
type classifiedFile struct {
261+
F detector.File
262+
C *classifier.Classification
263+
Err error
264+
}
265+
230266
// Start gets a stream of Infringements from Run and executes a callback on them to apply a Penalty
231267
func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace, []config.PenaltyKind)) {
232268
ps, err := agent.detector.DiscoverProcesses(ctx)
233269
if err != nil {
234270
log.WithError(err).Fatal("cannot start process detector")
235271
}
236272

273+
// Start filesystem detection if enabled
274+
var fs <-chan detector.File
275+
if agent.fileDetector != nil {
276+
fs, err = agent.fileDetector.DiscoverFiles(ctx)
277+
if err != nil {
278+
log.WithError(err).Warn("cannot start filesystem detector")
279+
}
280+
}
281+
237282
var (
238283
wg sync.WaitGroup
239284
cli = make(chan detector.Process, 500)
240285
clo = make(chan classifiedProcess, 50)
286+
fli = make(chan detector.File, 100)
287+
flo = make(chan classifiedFile, 25)
241288
)
242289
agent.metrics.RegisterClassificationQueues(cli, clo)
243290

@@ -268,6 +315,25 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
268315
}()
269316
}
270317

318+
// Filesystem classification workers (fewer than process workers)
319+
if agent.fileClassifier != nil {
320+
for i := 0; i < 5; i++ {
321+
wg.Add(1)
322+
go func() {
323+
defer wg.Done()
324+
for file := range fli {
325+
class, err := agent.fileClassifier.MatchesFile(file.Path)
326+
if err == nil && class.Level == classifier.LevelNoMatch {
327+
log.Infof("File classification: no match - %s", file.Path)
328+
continue
329+
}
330+
log.Infof("File classification result: %s (level: %s, err: %v)", file.Path, class.Level, err)
331+
flo <- classifiedFile{F: file, C: class, Err: err}
332+
}
333+
}()
334+
}
335+
}
336+
271337
defer log.Info("agent smith main loop ended")
272338

273339
// We want to fill the classifier in a Go routine seaparete from using the classification
@@ -288,6 +354,15 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
288354
// we're overfilling the classifier worker
289355
agent.metrics.classificationBackpressureInDrop.Inc()
290356
}
357+
case file, ok := <-fs:
358+
if !ok {
359+
continue
360+
}
361+
select {
362+
case fli <- file:
363+
default:
364+
// filesystem queue full, skip this file
365+
}
291366
}
292367
}
293368
}()
@@ -319,6 +394,32 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
319394
},
320395
},
321396
})
397+
case fileClass := <-flo:
398+
log.Infof("Received classified file from flo channel")
399+
file, cl, err := fileClass.F, fileClass.C, fileClass.Err
400+
if err != nil {
401+
log.WithError(err).WithFields(log.OWI(file.Workspace.OwnerID, file.Workspace.WorkspaceID, file.Workspace.InstanceID)).WithField("path", file.Path).Error("cannot classify filesystem file")
402+
continue
403+
}
404+
405+
log.WithField("path", file.Path).WithField("severity", cl.Level).WithField("message", cl.Message).
406+
WithFields(log.OWI(file.Workspace.OwnerID, file.Workspace.WorkspaceID, file.Workspace.InstanceID)).
407+
Info("filesystem signature detected")
408+
409+
_, _ = agent.Penalize(InfringingWorkspace{
410+
SupervisorPID: file.Workspace.PID,
411+
Owner: file.Workspace.OwnerID,
412+
InstanceID: file.Workspace.InstanceID,
413+
WorkspaceID: file.Workspace.WorkspaceID,
414+
GitRemoteURL: []string{file.Workspace.GitURL},
415+
Infringements: []Infringement{
416+
{
417+
Kind: config.GradeKind(config.InfringementExec, common.Severity(cl.Level)), // Reuse exec for now
418+
Description: fmt.Sprintf("filesystem signature: %s", cl.Message),
419+
CommandLine: []string{file.Path}, // Use file path as "command"
420+
},
421+
},
422+
})
322423
}
323424
}
324425
}

components/ee/agent-smith/pkg/classifier/classifier.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ type ProcessClassifier interface {
4848
Matches(executable string, cmdline []string) (*Classification, error)
4949
}
5050

51+
// FileClassifier matches filesystem files against signatures
52+
type FileClassifier interface {
53+
MatchesFile(filePath string) (*Classification, error)
54+
GetFileSignatures() []*Signature
55+
}
56+
5157
func NewCommandlineClassifier(name string, level Level, allowList []string, blockList []string) (*CommandlineClassifier, error) {
5258
al := make([]*regexp.Regexp, 0, len(allowList))
5359
for _, a := range allowList {
@@ -173,6 +179,7 @@ type SignatureMatchClassifier struct {
173179
}
174180

175181
var _ ProcessClassifier = &SignatureMatchClassifier{}
182+
var _ FileClassifier = &SignatureMatchClassifier{}
176183

177184
var sigNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierSignature}
178185

@@ -223,6 +230,63 @@ func (sigcl *SignatureMatchClassifier) Matches(executable string, cmdline []stri
223230
return sigNoMatch, nil
224231
}
225232

233+
// MatchesFile checks if a filesystem file matches any filesystem signatures
234+
func (sigcl *SignatureMatchClassifier) MatchesFile(filePath string) (c *Classification, err error) {
235+
filesystemSignatures := sigcl.GetFileSignatures()
236+
237+
if len(filesystemSignatures) == 0 {
238+
return sigNoMatch, nil
239+
}
240+
241+
// Skip filename matching - the filesystem detector already filtered files
242+
// based on signature filename patterns, so any file that reaches here
243+
// should be checked for content matching against all filesystem signatures
244+
matchingSignatures := filesystemSignatures
245+
246+
// Open file for signature matching
247+
r, err := os.Open(filePath)
248+
if err != nil {
249+
var reason string
250+
if errors.Is(err, fs.ErrNotExist) {
251+
reason = processMissNotFound
252+
} else if errors.Is(err, os.ErrPermission) {
253+
reason = processMissPermissionDenied
254+
} else {
255+
reason = processMissOther
256+
}
257+
log.WithFields(logrus.Fields{
258+
"filePath": filePath,
259+
"reason": reason,
260+
}).WithError(err).Debug("filesystem signature classification miss")
261+
return sigNoMatch, nil
262+
}
263+
defer r.Close()
264+
265+
var serr error
266+
267+
src := SignatureReadCache{
268+
Reader: r,
269+
}
270+
for _, sig := range matchingSignatures {
271+
match, err := sig.Matches(&src)
272+
if match {
273+
return &Classification{
274+
Level: sigcl.DefaultLevel,
275+
Classifier: ClassifierSignature,
276+
Message: fmt.Sprintf("filesystem signature matches %s", sig.Name),
277+
}, nil
278+
}
279+
if err != nil {
280+
serr = err
281+
}
282+
}
283+
if serr != nil {
284+
return nil, serr
285+
}
286+
287+
return sigNoMatch, nil
288+
}
289+
226290
type SignatureReadCache struct {
227291
Reader io.ReaderAt
228292
header []byte
@@ -240,6 +304,17 @@ func (sigcl *SignatureMatchClassifier) Collect(m chan<- prometheus.Metric) {
240304
sigcl.signatureHitTotal.Collect(m)
241305
}
242306

307+
// GetFileSignatures returns signatures that are configured for filesystem domain
308+
func (sigcl *SignatureMatchClassifier) GetFileSignatures() []*Signature {
309+
var filesystemSignatures []*Signature
310+
for _, sig := range sigcl.Signatures {
311+
if sig.Domain == DomainFileSystem {
312+
filesystemSignatures = append(filesystemSignatures, sig)
313+
}
314+
}
315+
return filesystemSignatures
316+
}
317+
243318
// CompositeClassifier combines multiple classifiers into one. The first match wins.
244319
type CompositeClassifier []ProcessClassifier
245320

0 commit comments

Comments
 (0)