diff --git a/README.md b/README.md index eea6ad613..56feb3e8d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The Coraza Project maintains implementations and plugins for the following serve ## Prerequisites -* Go v1.22+ or tinygo compiler +* Recent Go version (see [go.mod](./go.mod)) or tinygo compiler. * Linux distribution (Debian or Centos recommended), Windows or Mac. ## Coraza Core Usage @@ -107,8 +107,9 @@ live reloads, use `WAF.Close()` (via `experimental.WAFCloser`) to release cached WAF is destroyed, or use this tag to opt out of memoization entirely. * `no_fs_access` - indicates that the target environment has no access to FS in order to not leverage OS' filesystem related functionality e.g. file body buffers. * `coraza.rule.case_sensitive_args_keys` - enables case-sensitive matching for ARGS keys, aligning Coraza behavior with RFC 3986 specification. It will be enabled by default in the next major version. -* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876) +* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876). * `coraza.rule.mandatory_rule_id_check` - enables strict rule id check where `id` action is required for all SecRule/SecAction. +* `coraza.rule.rx_prefilter` - sets the default value of the `SecRxPreFilter` directive to `On`. Optimizes `@rx` operator, by skipping the full regex when an input can not match. The build tag is meant for testing, rely on the directive `SecRxPreFilter` for runtime configuration and broader documentation. ## E2E Testing @@ -180,7 +181,7 @@ Our vulnerability management team will respond within 3 working days of your rep ## Donations -For donations, see [Donations site](https://owasp.org/donate/?reponame=www-project-coraza-web-application-firewall&title=OWASP+Coraza+Web+Application+Firewall) +For donations, see [Donations site](https://owasp.org/donate/?reponame=www-project-coraza-web-application-firewall&title=OWASP+Coraza+Web+Application+Firewall). ## Thanks to all the people who have contributed diff --git a/coraza.conf-recommended b/coraza.conf-recommended index 40fd4659c..4830157a4 100644 --- a/coraza.conf-recommended +++ b/coraza.conf-recommended @@ -8,6 +8,15 @@ SecRuleEngine DetectionOnly +# -- Performance optimizations ----------------------------------------------- + +# Enable compile-time literal pre-filtering for the @rx operator. +# When enabled, Coraza analyses each regex pattern at rule-load time and +# builds pre-checks that will be executed before the full regex evaluation, +# allowing to skip unnecessary regex evaluations. +# +SecRxPreFilter Off + # -- Request body handling --------------------------------------------------- # Allows Coraza to access request bodies. Without this, Coraza diff --git a/experimental/plugins/plugintypes/operator.go b/experimental/plugins/plugintypes/operator.go index 0aa598d46..cd74022c9 100644 --- a/experimental/plugins/plugintypes/operator.go +++ b/experimental/plugins/plugintypes/operator.go @@ -13,10 +13,10 @@ type Memoizer interface { // OperatorOptions is used to store the options for a rule operator type OperatorOptions struct { - // Arguments is used to store the operator args + // Arguments stores the operator args. Arguments string - // Path is used to store a list of possible data paths + // Path stores a list of possible data paths. Path []string // Root is the root to resolve Path from. @@ -27,6 +27,10 @@ type OperatorOptions struct { // Memoizer caches expensive compilations (regex, aho-corasick). Memoizer Memoizer + + // RxPreFilterEnabled controls whether the @rx operatorcompile-time + // literal pre-filtering is enabled. + RxPreFilterEnabled bool } // Operator interface is used to define rule @operators diff --git a/internal/corazawaf/rxprefilter_default.go b/internal/corazawaf/rxprefilter_default.go new file mode 100644 index 000000000..d460c794e --- /dev/null +++ b/internal/corazawaf/rxprefilter_default.go @@ -0,0 +1,11 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !coraza.rule.rx_prefilter + +package corazawaf + +// The feature is always compiled, and by default disabled. It can be set via SecRxPreFilter. +// This build tag is used to enable the feature by default for testing, being able to run the whole +// test suite with the feature enabled. +const defaultRxPreFilterEnabled = false diff --git a/internal/corazawaf/rxprefilter_on.go b/internal/corazawaf/rxprefilter_on.go new file mode 100644 index 000000000..62da3c349 --- /dev/null +++ b/internal/corazawaf/rxprefilter_on.go @@ -0,0 +1,12 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build coraza.rule.rx_prefilter + +package corazawaf + +// defaultRxPreFilterEnabled is true when the coraza.rule.rx_prefilter build tag +// is set so that the entire test suite (and any deployment built with the tag) +// exercises the prefilter path without requiring an explicit SecRxPreFilter On +// directive. The directive can still override this per WAF instance. +const defaultRxPreFilterEnabled = true diff --git a/internal/corazawaf/waf.go b/internal/corazawaf/waf.go index d7be23a28..b1d315fe6 100644 --- a/internal/corazawaf/waf.go +++ b/internal/corazawaf/waf.go @@ -150,6 +150,10 @@ type WAF struct { // Configures the maximum number of ARGS that will be accepted for processing. ArgumentLimit int + // RxPreFilterEnabled controls whether the @rx operator uses compile-time + // literal pre-filtering. Set by the SecRxPreFilter directive. + RxPreFilterEnabled bool + memoizerID uint64 memoizer *memoize.Memoizer closeOnce gosync.Once @@ -331,9 +335,10 @@ func NewWAF() *WAF { types.AuditLogPartResponseHeaders, types.AuditLogPartAuditLogTrailer, }, - AuditLogFormat: "Native", - Logger: logger, - ArgumentLimit: 1000, + AuditLogFormat: "Native", + Logger: logger, + ArgumentLimit: 1000, + RxPreFilterEnabled: defaultRxPreFilterEnabled, } if environment.HasAccessToFS { diff --git a/internal/operators/rx.go b/internal/operators/rx.go index e3f0c2cc0..173e6cb89 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -82,16 +82,22 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { // Compile regex + prefilter together so memoize caches all artifacts as one // unit. This avoids re-parsing the AST for minMatchLength/prefilterFunc when // the same pattern appears in multiple rules. - compiled, err := memoizeDo(options.Memoizer, data, func() (any, error) { + // + // The prefilter flag is part of the key because the global cache is shared + // across all WAF instances: two WAFs with different SecRxPreFilter settings + // must not share a compiled artifact. + cacheKey := fmt.Sprintf("rx:%v:%s", options.RxPreFilterEnabled, data) + compiled, err := memoizeDo(options.Memoizer, cacheKey, func() (any, error) { re, err := regexp.Compile(data) if err != nil { return nil, err } - return &rxCompiled{ - re: re, - minLen: minMatchLength(data), - prefilter: prefilterFunc(data), - }, nil + c := &rxCompiled{re: re} + if options.RxPreFilterEnabled { + c.minLen = minMatchLength(data) + c.prefilter = prefilterFunc(data) + } + return c, nil }) if err != nil { return nil, err @@ -105,12 +111,14 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool { + // Prefiltering evaluation is performed here, skipping regex evaluation for clearly non-matching inputs. if len(value) < o.minLen { return false } if o.prefilter != nil && !o.prefilter(value) { return false } + if tx.Capturing() { // FindStringSubmatchIndex returns a slice of index pairs [start0, end0, start1, end1, ...] // instead of allocating new strings for each capture group. We then slice the original diff --git a/internal/operators/rxprefilter.go b/internal/operators/rxprefilter.go index b20df0cd7..95b982f21 100644 --- a/internal/operators/rxprefilter.go +++ b/internal/operators/rxprefilter.go @@ -1,8 +1,6 @@ // Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors // SPDX-License-Identifier: Apache-2.0 -//go:build coraza.rule.rx_prefilter - // rxprefilter implements compile-time analysis of regex patterns to build cheap // pre-checks that can skip expensive regexp.Regexp evaluation when the input // clearly cannot match. @@ -180,7 +178,8 @@ func prefilterFunc(pattern string) func(string) bool { if len(filtered) == 0 { return nil } - if len(filtered) == 1 { + switch { + case len(filtered) == 1: needle := filtered[0] if caseInsensitive { pf = func(s string) bool { @@ -191,7 +190,7 @@ func prefilterFunc(pattern string) func(string) bool { return strings.Contains(s, needle) } } - } else if caseInsensitive { + case caseInsensitive: pf = func(s string) bool { for _, needle := range filtered { if !containsFoldASCII(s, needle) { @@ -200,7 +199,7 @@ func prefilterFunc(pattern string) func(string) bool { } return true } - } else { + default: pf = func(s string) bool { for _, needle := range filtered { if !strings.Contains(s, needle) { @@ -223,7 +222,8 @@ func prefilterFunc(pattern string) func(string) bool { return nil } filtered := v - if len(filtered) == 1 { + switch { + case len(filtered) == 1: needle := filtered[0] if caseInsensitive { pf = func(s string) bool { @@ -234,14 +234,14 @@ func prefilterFunc(pattern string) func(string) bool { return strings.Contains(s, needle) } } - } else if caseInsensitive && !allASCIIStrings([]string(filtered)) { + case caseInsensitive && !allASCIIStrings([]string(filtered)): // When case-insensitive, Aho-Corasick uses ASCII-only folding. If any // needle is non-ASCII (e.g. "ſelect" lowercased from "Select"), it could // fold to an ASCII equivalent under Go's Unicode case rules — meaning a // pure-ASCII input like "select" would match (?i)ſelect but the automaton // wouldn't find "ſelect" in "select". To avoid false negatives, bail out. return nil - } else { + default: // Build an Aho-Corasick automaton for multi-pattern matching in O(n). // Same library already used by the @pm operator. builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{ diff --git a/internal/operators/rxprefilter_noop.go b/internal/operators/rxprefilter_noop.go deleted file mode 100644 index 5de9b7300..000000000 --- a/internal/operators/rxprefilter_noop.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors -// SPDX-License-Identifier: Apache-2.0 - -//go:build !coraza.rule.rx_prefilter - -package operators - -// minMatchLength is a no-op when the rx_prefilter build tag is not set. -// Enable with: go build -tags coraza.rule.rx_prefilter -func minMatchLength(_ string) int { return 0 } - -// prefilterFunc is a no-op when the rx_prefilter build tag is not set. -// Enable with: go build -tags coraza.rule.rx_prefilter -func prefilterFunc(_ string) func(string) bool { return nil } diff --git a/internal/operators/rxprefilter_test.go b/internal/operators/rxprefilter_test.go index f6ca91c99..79872e833 100644 --- a/internal/operators/rxprefilter_test.go +++ b/internal/operators/rxprefilter_test.go @@ -1,8 +1,6 @@ // Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors // SPDX-License-Identifier: Apache-2.0 -//go:build coraza.rule.rx_prefilter - package operators import ( @@ -64,8 +62,8 @@ func TestMinMatchLength(t *testing.T) { {"\\bhello\\b", 5}, // Unicode - {"ハロー", 9}, // 3 runes × 3 bytes each - {"café", 5}, // é is 2 bytes + {"ハロー", 9}, // 3 runes × 3 bytes each + {"café", 5}, // é is 2 bytes } for _, tc := range tests { t.Run(tc.pattern, func(t *testing.T) { @@ -83,11 +81,11 @@ func TestMinMatchLength(t *testing.T) { // accepts known matching inputs and rejects known non-matching inputs. func TestPrefilterFuncBuildability(t *testing.T) { tests := []struct { - pattern string - wantNil bool - desc string - match string // input that the regex matches (checked when prefilter is non-nil) - noMatch string // input that the regex does not match (checked when prefilter is non-nil) + pattern string + wantNil bool + desc string + match string // input that the regex matches (checked when prefilter is non-nil) + noMatch string // input that the regex does not match (checked when prefilter is non-nil) }{ {"hello", false, "plain literal", "say hello", "goodbye"}, {"[a-z]+", true, "char class only", "", ""}, @@ -128,9 +126,8 @@ func TestPrefilterFuncBuildability(t *testing.T) { t.Fatalf("test bug: noMatch %q actually matches %q", tc.noMatch, tc.pattern) } // Prefilter may accept (conservative) or reject — but if it rejects, it's correct - if pf(tc.noMatch) { - // Conservative pass-through: prefilter said "maybe", that's OK - } + // Conservative pass-through: prefilter said "maybe", that's OK + _ = pf(tc.noMatch) } }) } @@ -500,7 +497,7 @@ func TestPrefilterIntegrationViaNewRX(t *testing.T) { for _, tc := range tests { t.Run(fmt.Sprintf("%s/%s", tc.pattern, tc.input), func(t *testing.T) { - opts := plugintypes.OperatorOptions{Arguments: tc.pattern} + opts := plugintypes.OperatorOptions{Arguments: tc.pattern, RxPreFilterEnabled: true} op, err := newRX(opts) if err != nil { t.Fatal(err) @@ -554,7 +551,7 @@ func TestPrefilterCapturingCorrectness(t *testing.T) { for _, tc := range tests { t.Run(tc.pattern, func(t *testing.T) { - opts := plugintypes.OperatorOptions{Arguments: tc.pattern} + opts := plugintypes.OperatorOptions{Arguments: tc.pattern, RxPreFilterEnabled: true} op, err := newRX(opts) if err != nil { t.Fatal(err) @@ -593,11 +590,11 @@ func TestContainsFoldASCII(t *testing.T) { {"", "hello", false}, {"hi", "hello", false}, {"xhellox", "hello", true}, - {"HÉLLO", "hello", false}, // non-ASCII É in haystack, ASCII needle - {"Straße", "straße", true}, // non-ASCII needle: conservative true to avoid false negatives - {"STRASSE", "straße", true}, // non-ASCII needle: conservative true (Unicode folding is tricky) + {"HÉLLO", "hello", false}, // non-ASCII É in haystack, ASCII needle + {"Straße", "straße", true}, // non-ASCII needle: conservative true to avoid false negatives + {"STRASSE", "straße", true}, // non-ASCII needle: conservative true (Unicode folding is tricky) {"totally different", "straße", true}, // non-ASCII needle: conservative true even when absent - {"", "", true}, // empty needle always matches + {"", "", true}, // empty needle always matches {"abc", "", true}, {"SELECT", "select", true}, {"sElEcT", "select", true}, @@ -755,7 +752,7 @@ func TestPrefilterWithSMPrefix(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - opts := plugintypes.OperatorOptions{Arguments: tc.pattern} + opts := plugintypes.OperatorOptions{Arguments: tc.pattern, RxPreFilterEnabled: true} op, err := newRX(opts) if err != nil { t.Fatal(err) @@ -829,8 +826,8 @@ func TestMemoizeSharesPrefilter(t *testing.T) { // TestPrefilterConcurrentSafety verifies the prefilter closure and Aho-Corasick // automaton can be safely called from multiple goroutines concurrently. func TestPrefilterConcurrentSafety(t *testing.T) { - pattern := "(?i)(?:union\\s+select|insert\\s+into|delete\\s+from)" - opts := plugintypes.OperatorOptions{Arguments: pattern} + RxPattern := `(?i)(?:union\s+select|insert\s+into|delete\s+from)` + opts := plugintypes.OperatorOptions{Arguments: RxPattern} op, err := newRX(opts) if err != nil { t.Fatal(err) @@ -852,7 +849,7 @@ func TestPrefilterConcurrentSafety(t *testing.T) { done := make(chan struct{}) // Compile the reference regex once, outside the goroutines. - re := regexp.MustCompile("(?i)(?:union\\s+select|insert\\s+into|delete\\s+from)") + re := regexp.MustCompile(RxPattern) for g := 0; g < goroutines; g++ { go func() { diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 2f32184df..879bc1c40 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -1393,6 +1393,32 @@ func directiveSecArgumentsLimit(options *DirectiveOptions) error { return nil } +// Description: Enables or disables pre-filtering for the @rx operator. +// Syntax: SecRxPreFilter On|Off +// Default: Off +// --- +// When enabled, Coraza analyses each regex pattern at rule-load time to extract required +// literal substrings and compute the minimum match length. At request time these cheap +// checks run before the full regex, allowing the engine to skip the regex entirely when +// an input clearly cannot match. +// +// Example: +// ```seclang +// SecRxPreFilter On +// ``` +func directiveSecRxPreFilter(options *DirectiveOptions) error { + if len(options.Opts) == 0 { + return errEmptyOptions + } + + b, err := parseBoolean(strings.ToLower(options.Opts)) + if err != nil { + return err + } + options.WAF.RxPreFilterEnabled = b + return nil +} + func parseBoolean(data string) (bool, error) { data = strings.ToLower(data) switch data { diff --git a/internal/seclang/directivesmap.gen.go b/internal/seclang/directivesmap.gen.go index c9c118674..adef34d1a 100644 --- a/internal/seclang/directivesmap.gen.go +++ b/internal/seclang/directivesmap.gen.go @@ -66,6 +66,7 @@ var ( _ directive = directiveSecIgnoreRuleCompilationErrors _ directive = directiveSecDataset _ directive = directiveSecArgumentsLimit + _ directive = directiveSecRxPreFilter ) var directivesMap = map[string]directive{ @@ -129,6 +130,7 @@ var directivesMap = map[string]directive{ "secignorerulecompilationerrors": directiveSecIgnoreRuleCompilationErrors, "secdataset": directiveSecDataset, "secargumentslimit": directiveSecArgumentsLimit, + "secrxprefilter": directiveSecRxPreFilter, // Unsupported directives "secargumentseparator": directiveUnsupported, diff --git a/internal/seclang/rule_parser.go b/internal/seclang/rule_parser.go index 9ed6151d9..7e01043b2 100644 --- a/internal/seclang/rule_parser.go +++ b/internal/seclang/rule_parser.go @@ -200,6 +200,7 @@ func (rp *RuleParser) ParseOperator(operator string) error { } if rp.options.WAF != nil { opts.Memoizer = rp.options.WAF.Memoizer() + opts.RxPreFilterEnabled = rp.options.WAF.RxPreFilterEnabled } if wd := rp.options.ParserConfig.WorkingDir; wd != "" {