Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
cc42589
Add get_included_keywords method to CompiledRule trait
artslidd Aug 22, 2024
2303433
Add should_keywords_match_events_path feature to ScopedRuleSet struct
artslidd Aug 22, 2024
08544e3
Add find_true_positive_rules_from_current_path method to ContentVisit…
artslidd Aug 22, 2024
1e3e98a
Use it in the push_segment method
artslidd Aug 22, 2024
82191bd
Fix sanitize_segments
artslidd Aug 23, 2024
0e85e6e
Modify method signature
artslidd Aug 23, 2024
c1712f9
Remove dead code
artslidd Aug 23, 2024
310e49a
Remove unused imports
artslidd Aug 23, 2024
832b586
Cargo fmt
artslidd Aug 23, 2024
93c4b1f
Optimise sanitize_path creation
artslidd Aug 23, 2024
3c66fa7
Allow dead_code
artslidd Aug 23, 2024
234b30e
Put _ in front of unused arguments
artslidd Aug 23, 2024
a9ee282
Address clippy warnings
artslidd Aug 23, 2024
afe0aa7
Pop sanitized_segments_until_node when the segment is popped
artslidd Oct 14, 2024
3b7e4b4
Add get_included_keywords method to CompiledRule trait
artslidd Aug 22, 2024
630745b
Add should_keywords_match_events_path feature to ScopedRuleSet struct
artslidd Aug 22, 2024
d3c321a
Add find_true_positive_rules_from_current_path method to ContentVisit…
artslidd Aug 22, 2024
09d2060
Modify method signature
artslidd Aug 23, 2024
5ce5779
Remove unused imports
artslidd Aug 23, 2024
e3dc1d7
Cargo fmt
artslidd Aug 23, 2024
ebec67c
Use true_positive_rule_idx to decide if string is true positive or no…
artslidd Aug 23, 2024
b4ec306
Cargo fmt and re-add segment.pop()
artslidd Aug 23, 2024
08e9c77
Do not clone the Cow and instead return a Borrowed Cow
artslidd Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sds/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ pub fn scoped_ruleset(c: &mut Criterion) {
});
false
}

fn find_true_positive_rules_from_current_path(
&self,
sanitized_path: &str,
current_true_positive_rule_idx: &mut Vec<usize>,
) -> usize {
0
}
}

fast_rule_set.visit_string_rule_combinations(
Expand Down
4 changes: 2 additions & 2 deletions sds/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ impl<'a> PathSegment<'a> {
}
}

pub fn sanitize(&self) -> Option<Cow<'a, str>> {
pub fn sanitize(&'a self) -> Option<Cow<'a, str>> {
if let PathSegment::Field(field) = self {
match should_bypass_standardize_path(field) {
BypassStandardizePathResult::BypassAndAllLowercase => Some(field.clone()),
BypassStandardizePathResult::BypassAndAllLowercase => Some(Cow::Borrowed(field)),
BypassStandardizePathResult::BypassAndAllUppercase => {
Some(Cow::Owned(field.to_ascii_lowercase()))
}
Expand Down
61 changes: 45 additions & 16 deletions sds/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::any::{Any, TypeId};
use std::sync::Arc;

use self::metrics::ScannerMetrics;
use crate::proximity_keywords::{contains_keyword_in_path, CompiledIncludedProximityKeywords};
use crate::scanner::config::RuleConfig;
use crate::scanner::regex_rule::compiled::RegexCompiledRule;
use crate::scanner::regex_rule::{access_regex_caches, RegexCaches};
Expand Down Expand Up @@ -59,18 +60,18 @@ where
pub trait CompiledRuleDyn: Send + Sync {
fn get_match_action(&self) -> &MatchAction;
fn get_scope(&self) -> &Scope;
fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords>;

#[allow(clippy::too_many_arguments)]
fn get_string_matches(
&self,
content: &str,
path: &Path,
regex_caches: &mut RegexCaches,
group_data: &mut AHashMap<TypeId, Box<dyn Any>>,
exclusion_check: &ExclusionCheck<'_>,
excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
should_keywords_match_event_paths: bool,
true_positive_rule_idx: &Vec<usize>,
scanner_labels: &Labels,
);

Expand Down Expand Up @@ -102,16 +103,19 @@ impl<T: CompiledRule> CompiledRuleDyn for T {
self.get_scope()
}

fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> {
self.get_included_keywords()
}

fn get_string_matches(
&self,
content: &str,
path: &Path,
regex_caches: &mut RegexCaches,
group_data: &mut AHashMap<TypeId, Box<dyn Any>>,
exclusion_check: &ExclusionCheck<'_>,
excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
should_keywords_match_event_paths: bool,
true_positive_rule_idx: &Vec<usize>,
scanner_labels: &Labels,
) {
let group_data_any = group_data
Expand All @@ -120,13 +124,12 @@ impl<T: CompiledRule> CompiledRuleDyn for T {
let group_data: &mut T::GroupData = group_data_any.downcast_mut().unwrap();
self.get_string_matches(
content,
path,
regex_caches,
group_data,
exclusion_check,
excluded_matches,
match_emitter,
should_keywords_match_event_paths,
true_positive_rule_idx,
)
}

Expand Down Expand Up @@ -158,18 +161,18 @@ pub trait CompiledRule: Send + Sync {

fn get_match_action(&self) -> &MatchAction;
fn get_scope(&self) -> &Scope;
fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords>;

#[allow(clippy::too_many_arguments)]
fn get_string_matches(
&self,
content: &str,
path: &Path,
regex_caches: &mut RegexCaches,
group_data: &mut Self::GroupData,
exclusion_check: &ExclusionCheck<'_>,
excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
should_keywords_match_event_paths: bool,
true_positive_rule_idx: &Vec<usize>,
);

// Whether a match from this rule should be excluded (marked as a false-positive)
Expand Down Expand Up @@ -615,7 +618,10 @@ impl ScannerBuilder<'_> {
.map(|rule| rule.get_scope().clone())
.collect::<Vec<_>>(),
)
.with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards);
.with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards)
.with_keywords_should_match_event_paths(
self.scanner_features.should_keywords_match_event_paths,
);

{
let stats = &*GLOBAL_STATS;
Expand Down Expand Up @@ -652,6 +658,7 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {
content: &str,
mut rule_visitor: crate::scoped_ruleset::RuleIndexVisitor,
exclusion_check: ExclusionCheck<'b>,
true_positive_rule_idx: &Vec<usize>,
) -> bool {
// matches for a single path
let mut path_rules_matches = vec![];
Expand All @@ -678,15 +685,12 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {

rule.get_string_matches(
content,
path,
self.regex_caches,
&mut self.regex_caches,
&mut group_data,
&exclusion_check,
self.excluded_matches,
&mut emitter,
self.scanner
.scanner_features
.should_keywords_match_event_paths,
true_positive_rule_idx,
&self.scanner.labels,
);
}
Expand Down Expand Up @@ -718,6 +722,26 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {

has_match
}

fn find_true_positive_rules_from_current_path(
&self,
sanitized_path: &str,
current_true_positive_rule_idx: &mut Vec<usize>,
) -> usize {
let mut times_pushed = 0;
for (idx, rule) in self.scanner.rules.iter().enumerate() {
if !current_true_positive_rule_idx.contains(&idx) {
if let Some(keywords) = rule.get_included_keywords() {
if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) {
// The rule is found has a true positive for this path, push it
current_true_positive_rule_idx.push(idx);
times_pushed += 1
}
}
}
}
times_pushed
}
}

// Calculates the next starting position for a regex match if a the previous match is a false positive
Expand Down Expand Up @@ -801,17 +825,22 @@ mod test {
fn get_scope(&self) -> &Scope {
&self.scope
}

fn create_group_data(_: &Labels) {}

fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> {
None
}

fn get_string_matches(
&self,
_content: &str,
_path: &Path,
_regex_caches: &mut RegexCaches,
_group_data: &mut Self::GroupData,
_exclusion_check: &ExclusionCheck<'_>,
_excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
_should_keywords_match_event_paths: bool,
_true_positive_rule_idx: &Vec<usize>,
) {
match_emitter.emit(StringMatch { start: 10, end: 16 });
}
Expand Down
48 changes: 23 additions & 25 deletions sds/src/scanner/regex_rule/compiled.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
use crate::match_validation::config::{InternalMatchValidationType, MatchValidationType};
use crate::proximity_keywords::{
contains_keyword_in_path, get_prefix_start, is_index_within_prefix,
CompiledExcludedProximityKeywords, CompiledIncludedProximityKeywords,
get_prefix_start, is_index_within_prefix, CompiledExcludedProximityKeywords,
CompiledIncludedProximityKeywords,
};
use crate::scanner::metrics::RuleMetrics;
use crate::scanner::regex_rule::regex_store::SharedRegex;
use crate::scanner::regex_rule::RegexCaches;
use crate::scanner::scope::Scope;
use crate::scanner::{get_next_regex_start, is_false_positive_match};
use crate::secondary_validation::Validator;
use crate::{CompiledRule, ExclusionCheck, Labels, MatchAction, MatchEmitter, Path, StringMatch};
use crate::{CompiledRule, ExclusionCheck, Labels, MatchAction, MatchEmitter, StringMatch};
use ahash::AHashSet;
use regex_automata::meta::Cache;
use regex_automata::Input;
Expand Down Expand Up @@ -40,27 +40,29 @@ impl CompiledRule for RegexCompiledRule {
&self.scope
}
fn create_group_data(_: &Labels) {}
fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> {
self.included_keywords.as_ref()
}

fn get_string_matches(
&self,
content: &str,
path: &Path,
regex_caches: &mut RegexCaches,
_group_data: &mut (),
exclusion_check: &ExclusionCheck<'_>,
excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
should_keywords_match_event_paths: bool,
true_positive_rule_idx: &Vec<usize>,
) {
match self.included_keywords {
Some(ref included_keywords) => {
self.get_string_matches_with_included_keywords(
content,
path,
regex_caches,
exclusion_check,
excluded_matches,
match_emitter,
should_keywords_match_event_paths,
true_positive_rule_idx,
included_keywords,
);
}
Expand Down Expand Up @@ -108,31 +110,27 @@ impl RegexCompiledRule {
fn get_string_matches_with_included_keywords(
&self,
content: &str,
path: &Path,
regex_caches: &mut RegexCaches,
exclusion_check: &ExclusionCheck<'_>,
excluded_matches: &mut AHashSet<String>,
match_emitter: &mut dyn MatchEmitter,
should_keywords_match_event_paths: bool,
true_positive_rule_idx: &Vec<usize>,
included_keywords: &CompiledIncludedProximityKeywords,
) {
if should_keywords_match_event_paths {
let sanitized_path = path.sanitize();
if contains_keyword_in_path(&sanitized_path, &included_keywords.keywords_pattern) {
// since the path contains a match, we can skip future included keyword checks
let true_positive_search = self.true_positive_matches(
content,
0,
regex_caches.get(&self.regex),
false,
exclusion_check,
excluded_matches,
);
for string_match in true_positive_search {
match_emitter.emit(string_match);
}
return;
if !true_positive_rule_idx.is_empty() && true_positive_rule_idx.contains(&self.rule_index) {
// since the path contains a match, we can skip future included keyword checks
let true_positive_search = self.true_positive_matches(
content,
0,
regex_caches.get(&self.regex),
false,
exclusion_check,
excluded_matches,
);
for string_match in true_positive_search {
match_emitter.emit(string_match);
}
return;
}

let mut included_keyword_matches = included_keywords.keyword_matches(content);
Expand Down
Loading