diff --git a/go.mod b/go.mod index 3bb0a9bfef4..55187e9f586 100644 --- a/go.mod +++ b/go.mod @@ -136,6 +136,7 @@ require ( github.com/ebitengine/purego v0.8.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/gabriel-vasile/mimetype v1.4.8 // indirect + github.com/gaissmai/bart v0.26.0 // indirect github.com/gin-contrib/sse v1.0.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect diff --git a/go.sum b/go.sum index 49775536f42..04980dd9f78 100644 --- a/go.sum +++ b/go.sum @@ -172,6 +172,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= +github.com/gaissmai/bart v0.26.0 h1:xOZ57E9hJLBiQaSyeZa9wgWhGuzfGACgqp4BE77OkO0= +github.com/gaissmai/bart v0.26.0/go.mod h1:GREWQfTLRWz/c5FTOsIw+KkscuFkIV5t8Rp7Nd1Td5c= github.com/gin-contrib/gzip v1.2.3 h1:dAhT722RuEG330ce2agAs75z7yB+NKvX/ZM1r8w0u2U= github.com/gin-contrib/gzip v1.2.3/go.mod h1:ad72i4Bzmaypk8M762gNXa2wkxxjbz0icRNnuLJ9a/c= github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E= diff --git a/pkg/appsec/allowlists/allowlists.go b/pkg/appsec/allowlists/allowlists.go index 6e3286ce2c9..3468ab84f1e 100644 --- a/pkg/appsec/allowlists/allowlists.go +++ b/pkg/appsec/allowlists/allowlists.go @@ -2,11 +2,12 @@ package allowlists import ( "context" - "net" + "net/netip" "strings" "sync" "time" + "github.com/gaissmai/bart" log "github.com/sirupsen/logrus" "gopkg.in/tomb.v2" @@ -15,22 +16,16 @@ import ( const allowlistRefreshInterval = 60 * time.Second -type rangeAllowlist struct { - Range net.IPNet - Description string - AllowlistName string -} - -type ipAllowlist struct { - IP net.IP +// metadata stores Description and AllowlistName for a CIDR prefix +type metadata struct { Description string AllowlistName string } type AppsecAllowlist struct { LAPIClient *apiclient.ApiClient - ips []ipAllowlist - ranges []rangeAllowlist + trie *bart.Lite // BART lite table for IP/CIDR lookups + meta map[string]*metadata // Metadata keyed by CIDR prefix string lock sync.RWMutex logger *log.Entry tomb *tomb.Tomb @@ -39,8 +34,8 @@ type AppsecAllowlist struct { func NewAppsecAllowlist(logger *log.Entry) *AppsecAllowlist { a := &AppsecAllowlist{ logger: logger.WithField("component", "appsec-allowlist"), - ips: []ipAllowlist{}, - ranges: []rangeAllowlist{}, + trie: new(bart.Lite), + meta: make(map[string]*metadata), } return a @@ -60,47 +55,81 @@ func (a *AppsecAllowlist) FetchAllowlists(ctx context.Context) error { return err } - a.lock.Lock() - defer a.lock.Unlock() - prevIPsLen := len(a.ips) - prevRangesLen := len(a.ranges) - a.ranges = []rangeAllowlist{} - a.ips = []ipAllowlist{} + // Build new trie and metadata map outside the lock to minimize contention + newTrie := new(bart.Lite) + newMeta := make(map[string]*metadata) + + var ipCount, cidrCount int for _, allowlist := range *allowlists { for _, item := range allowlist.Items { + var prefix netip.Prefix + var err error + if strings.Contains(item.Value, "/") { - _, ipNet, err := net.ParseCIDR(item.Value) + // It's a CIDR range + prefix, err = netip.ParsePrefix(item.Value) if err != nil { continue } - - a.ranges = append(a.ranges, rangeAllowlist{ - Range: *ipNet, - Description: item.Description, - AllowlistName: allowlist.Name, - }) + cidrCount++ } else { - ip := net.ParseIP(item.Value) - if ip == nil { - return nil + // It's a single IP - convert to /32 (IPv4) or /128 (IPv6) + addr, err := netip.ParseAddr(item.Value) + if err != nil { + continue } + if addr.Is4() { + prefix = netip.PrefixFrom(addr, 32) + } else { + prefix = netip.PrefixFrom(addr, 128) + } + ipCount++ + } - a.ips = append(a.ips, ipAllowlist{ - IP: ip, - Description: item.Description, - AllowlistName: allowlist.Name, - }) + // Insert into new BART lite trie + newTrie.Insert(prefix) + + // Store metadata keyed by prefix string + prefixStr := prefix.String() + newMeta[prefixStr] = &metadata{ + Description: item.Description, + AllowlistName: allowlist.Name, } } } - if (len(a.ips) != 0 || len(a.ranges) != 0) && (prevIPsLen != len(a.ips) || prevRangesLen != len(a.ranges)) { - a.logger.Infof("fetched %d IPs and %d ranges", len(a.ips), len(a.ranges)) + // Atomically swap the new trie and metadata map under lock + // Only replace if the data has actually changed to avoid unnecessary pointer swaps + a.lock.Lock() + prevSize := a.trie.Size() + newSize := newTrie.Size() + + // Quick check: if sizes differ, data definitely changed + dataChanged := prevSize != newSize + + // If sizes match, compare trie structure (expensive but avoids unnecessary replacement) + if !dataChanged { + dataChanged = !a.trie.Equal(newTrie) + } + + if !dataChanged { + // Data unchanged (same trie structure), metadata map should also be identical + // since it's built from the same source. No need to swap. + a.lock.Unlock() + a.logger.Debugf("allowlist unchanged: %d IPs and %d ranges (total: %d entries)", ipCount, cidrCount, newSize) + return nil + } + + // Data changed, atomically swap to new trie and metadata + a.trie = newTrie + a.meta = newMeta + a.lock.Unlock() + + if newSize != prevSize && newSize > 0 { + a.logger.Infof("fetched %d IPs and %d ranges (total: %d entries)", ipCount, cidrCount, newSize) } - a.logger.Debugf("fetched %d IPs and %d ranges", len(a.ips), len(a.ranges)) - a.logger.Tracef("allowlisted ips: %+v", a.ips) - a.logger.Tracef("allowlisted ranges: %+v", a.ranges) + a.logger.Debugf("fetched %d IPs and %d ranges (total: %d entries)", ipCount, cidrCount, newSize) return nil } @@ -133,33 +162,46 @@ func (a *AppsecAllowlist) IsAllowlisted(sourceIP string) (bool, string) { a.lock.RLock() defer a.lock.RUnlock() - ip := net.ParseIP(sourceIP) - if ip == nil { + ip, err := netip.ParseAddr(sourceIP) + if err != nil { a.logger.Warnf("failed to parse IP %s", sourceIP) return false, "" } - for _, allowedIP := range a.ips { - if allowedIP.IP.Equal(ip) { - a.logger.Debugf("IP %s is allowlisted by %s from %s", sourceIP, allowedIP.Description, allowedIP.AllowlistName) - reason := allowedIP.IP.String() + " from " + allowedIP.AllowlistName - if allowedIP.Description != "" { - reason += " (" + allowedIP.Description + ")" - } - return true, reason - } + // Check if IP is in the trie + if !a.trie.Contains(ip) { + return false, "" } - for _, allowedRange := range a.ranges { - if allowedRange.Range.Contains(ip) { - a.logger.Debugf("IP %s is within allowlisted range by %s from %s", sourceIP, allowedRange.Description, allowedRange.AllowlistName) - reason := allowedRange.Range.String() + " from " + allowedRange.AllowlistName - if allowedRange.Description != "" { - reason += " (" + allowedRange.Description + ")" - } - return true, reason - } + // IP is allowlisted, find the matching prefix to get metadata + // Use LPM (Longest Prefix Match) to find the most specific matching prefix + // Create a /32 (IPv4) or /128 (IPv6) prefix from the IP for LPM lookup + var queryPrefix netip.Prefix + if ip.Is4() { + queryPrefix = netip.PrefixFrom(ip, 32) + } else { + queryPrefix = netip.PrefixFrom(ip, 128) + } + prefix, ok := a.trie.LookupPrefixLPM(queryPrefix) + if !ok { + // Should not happen if Contains returned true, but handle gracefully + a.logger.Debugf("IP %s is allowlisted but no prefix found", sourceIP) + return true, sourceIP + } + + // Get metadata for the matching prefix + prefixStr := prefix.String() + meta, exists := a.meta[prefixStr] + if !exists { + // Metadata not found, return basic reason + a.logger.Debugf("IP %s is allowlisted by %s", sourceIP, prefixStr) + return true, prefixStr } - return false, "" + a.logger.Debugf("IP %s is allowlisted by %s from %s", sourceIP, meta.Description, meta.AllowlistName) + reason := prefixStr + " from " + meta.AllowlistName + if meta.Description != "" { + reason += " (" + meta.Description + ")" + } + return true, reason } diff --git a/pkg/appsec/allowlists/allowlists_bench_test.go b/pkg/appsec/allowlists/allowlists_bench_test.go new file mode 100644 index 00000000000..bc95a758814 --- /dev/null +++ b/pkg/appsec/allowlists/allowlists_bench_test.go @@ -0,0 +1,78 @@ +package allowlists + +import ( + "net/netip" + "testing" + + log "github.com/sirupsen/logrus" +) + +func BenchmarkIsAllowlisted_Small(b *testing.B) { + a := NewAppsecAllowlist(log.NewEntry(log.New())) + + // Simulate small allowlist by manually adding entries + for i := range 10 { + prefix := netip.MustParsePrefix("192.168.0.0/24") + ip := prefix.Addr() + for range i { + ip = ip.Next() + } + a.trie.Insert(netip.PrefixFrom(ip, 32)) + a.meta[netip.PrefixFrom(ip, 32).String()] = &metadata{ + Description: "test", + AllowlistName: "test-list", + } + } + + b.ResetTimer() + for b.Loop() { + _, _ = a.IsAllowlisted("192.168.0.5") + } +} + +func BenchmarkIsAllowlisted_Large(b *testing.B) { + a := NewAppsecAllowlist(log.NewEntry(log.New())) + + // Simulate large allowlist + for i := range 1000 { + prefix := netip.MustParsePrefix("192.168.0.0/24") + ip := prefix.Addr() + for range i { + ip = ip.Next() + } + a.trie.Insert(netip.PrefixFrom(ip, 32)) + a.meta[netip.PrefixFrom(ip, 32).String()] = &metadata{ + Description: "test", + AllowlistName: "test-list", + } + } + a.trie.Insert(netip.MustParsePrefix("10.0.0.0/8")) + a.meta["10.0.0.0/8"] = &metadata{ + Description: "test-range", + AllowlistName: "test-list", + } + + b.ResetTimer() + for b.Loop() { + _, _ = a.IsAllowlisted("10.0.0.1") + } +} + +func BenchmarkIsAllowlisted_NotInList(b *testing.B) { + a := NewAppsecAllowlist(log.NewEntry(log.New())) + + // Add 1000 entries + for i := range 1000 { + prefix := netip.MustParsePrefix("192.168.0.0/24") + ip := prefix.Addr() + for range i { + ip = ip.Next() + } + a.trie.Insert(netip.PrefixFrom(ip, 32)) + } + + b.ResetTimer() + for b.Loop() { + _, _ = a.IsAllowlisted("203.0.113.1") + } +} diff --git a/pkg/appsec/allowlists/allowlists_test.go b/pkg/appsec/allowlists/allowlists_test.go index b729b96ee07..47259897687 100644 --- a/pkg/appsec/allowlists/allowlists_test.go +++ b/pkg/appsec/allowlists/allowlists_test.go @@ -78,19 +78,19 @@ func TestAppsecAllowlist(t *testing.T) { res, reason = allowlistClient.IsAllowlisted("5.4.3.2") assert.True(t, res) - assert.Equal(t, "5.4.3.2 from list1 (desc_ip)", reason) + // IPs are now stored as /32 CIDR prefixes + assert.Equal(t, "5.4.3.2/32 from list1 (desc_ip)", reason) res, reason = allowlistClient.IsAllowlisted("5.4.4.42") assert.True(t, res) assert.Equal(t, "5.4.4.0/24 from list1 (desc_range)", reason) - assert.Len(t, allowlistClient.ips, 1) - assert.Len(t, allowlistClient.ranges, 1) + // Check that trie has 2 entries (1 IP + 1 CIDR range) + assert.Equal(t, 2, allowlistClient.trie.Size()) err = allowlistClient.FetchAllowlists(ctx) require.NoError(t, err) - // No duplicates should be added - assert.Len(t, allowlistClient.ips, 1) - assert.Len(t, allowlistClient.ranges, 1) + // No duplicates should be added (same size) + assert.Equal(t, 2, allowlistClient.trie.Size()) } diff --git a/pkg/parser/whitelist.go b/pkg/parser/whitelist.go index cf068677898..592a5446e47 100644 --- a/pkg/parser/whitelist.go +++ b/pkg/parser/whitelist.go @@ -6,6 +6,7 @@ import ( "github.com/expr-lang/expr" "github.com/expr-lang/expr/vm" + "github.com/gaissmai/bart" "github.com/prometheus/client_golang/prometheus" "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" @@ -16,10 +17,9 @@ import ( type Whitelist struct { Reason string `yaml:"reason,omitempty"` Ips []string `yaml:"ip,omitempty"` - B_Ips []netip.Addr Cidrs []string `yaml:"cidr,omitempty"` - B_Cidrs []netip.Prefix - Exprs []string `yaml:"expression,omitempty"` + B_Trie *bart.Lite // BART lite table for IP/CIDR lookups + Exprs []string `yaml:"expression,omitempty"` B_Exprs []*ExprWhitelist } @@ -36,7 +36,7 @@ func (n *Node) ContainsExprLists() bool { } func (n *Node) ContainsIPLists() bool { - return len(n.Whitelist.B_Ips) > 0 || len(n.Whitelist.B_Cidrs) > 0 + return n.Whitelist.B_Trie != nil && n.Whitelist.B_Trie.Size() > 0 } func (n *Node) CheckIPsWL(p *pipeline.Event) bool { @@ -50,21 +50,12 @@ func (n *Node) CheckIPsWL(p *pipeline.Event) bool { if isWhitelisted { break } - for _, v := range n.Whitelist.B_Ips { - if v == src { - n.Logger.Debugf("Event from [%s] is whitelisted by IP (%s), reason [%s]", src, v, n.Whitelist.Reason) - isWhitelisted = true - break - } - n.Logger.Tracef("whitelist: %s is not eq [%s]", src, v) - } - for _, v := range n.Whitelist.B_Cidrs { - if v.Contains(src) { - n.Logger.Debugf("Event from [%s] is whitelisted by CIDR (%s), reason [%s]", src, v, n.Whitelist.Reason) - isWhitelisted = true - break - } - n.Logger.Tracef("whitelist: %s not in [%s]", src, v) + // Use BART lite trie for fast lookup + if n.Whitelist.B_Trie.Contains(src) { + n.Logger.Debugf("Event from [%s] is whitelisted, reason [%s]", src, n.Whitelist.Reason) + isWhitelisted = true + } else { + n.Logger.Tracef("whitelist: %s not in allowlist", src) } } if isWhitelisted { @@ -110,25 +101,41 @@ func (n *Node) CheckExprWL(cachedExprEnv map[string]any, p *pipeline.Event) (boo } func (n *Node) CompileWLs() (bool, error) { + // Initialize BART lite trie if we have IPs or CIDRs + if len(n.Whitelist.Ips) > 0 || len(n.Whitelist.Cidrs) > 0 { + n.Whitelist.B_Trie = new(bart.Lite) + } + + // Convert IPs to /32 (IPv4) or /128 (IPv6) CIDR format and insert into trie for _, v := range n.Whitelist.Ips { addr, err := netip.ParseAddr(v) if err != nil { return false, fmt.Errorf("parsing whitelist: %w", err) } - n.Whitelist.B_Ips = append(n.Whitelist.B_Ips, addr) - n.Logger.Debugf("adding ip %s to whitelists", addr) + // Convert IP to /32 (IPv4) or /128 (IPv6) CIDR prefix + var prefix netip.Prefix + if addr.Is4() { + prefix = netip.PrefixFrom(addr, 32) + } else { + prefix = netip.PrefixFrom(addr, 128) + } + + n.Whitelist.B_Trie.Insert(prefix) + n.Logger.Debugf("adding ip %s (as %s) to whitelists", addr, prefix) } + // Insert CIDR ranges into trie for _, v := range n.Whitelist.Cidrs { - tnet, err := netip.ParsePrefix(v) + prefix, err := netip.ParsePrefix(v) if err != nil { return false, fmt.Errorf("parsing whitelist: %w", err) } - n.Whitelist.B_Cidrs = append(n.Whitelist.B_Cidrs, tnet) - n.Logger.Debugf("adding cidr %s to whitelists", tnet) + n.Whitelist.B_Trie.Insert(prefix) + n.Logger.Debugf("adding cidr %s to whitelists", prefix) } + // Compile expression whitelists for _, filter := range n.Whitelist.Exprs { var err error expression := &ExprWhitelist{} diff --git a/pkg/parser/whitelist_bench_test.go b/pkg/parser/whitelist_bench_test.go new file mode 100644 index 00000000000..2353205134a --- /dev/null +++ b/pkg/parser/whitelist_bench_test.go @@ -0,0 +1,147 @@ +package parser + +import ( + "net/netip" + "testing" + + "github.com/gaissmai/bart" + log "github.com/sirupsen/logrus" + + "github.com/crowdsecurity/crowdsec/pkg/pipeline" +) + +func BenchmarkCheckIPsWL_SmallAllowlist(b *testing.B) { + node := &Node{ + Logger: log.NewEntry(log.New()), + Whitelist: Whitelist{ + Reason: "test", + B_Trie: new(bart.Lite), + }, + } + + // Small allowlist: 10 IPs + 2 CIDRs + for i := range 10 { + ip := netip.MustParseAddr("192.168.0.1") + for range i { + ip = ip.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(ip, 32)) + } + node.Whitelist.B_Trie.Insert(netip.MustParsePrefix("10.0.0.0/8")) + node.Whitelist.B_Trie.Insert(netip.MustParsePrefix("172.16.0.0/12")) + + event := &pipeline.Event{ + Meta: map[string]string{ + "source_ip": "192.168.0.5", + }, + } + + b.ResetTimer() + for b.Loop() { + _ = node.CheckIPsWL(event) + } +} + +func BenchmarkCheckIPsWL_MediumAllowlist(b *testing.B) { + node := &Node{ + Logger: log.NewEntry(log.New()), + Whitelist: Whitelist{ + Reason: "test", + B_Trie: new(bart.Lite), + }, + } + + // Medium allowlist: 100 IPs + 10 CIDRs + for i := range 100 { + ip := netip.MustParseAddr("192.168.0.1") + for range i { + ip = ip.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(ip, 32)) + } + for i := range 10 { + base := netip.MustParseAddr("10.0.0.0") + for range i { + base = base.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(base, 24)) + } + + event := &pipeline.Event{ + Meta: map[string]string{ + "source_ip": "192.168.0.50", + }, + } + + b.ResetTimer() + for b.Loop() { + _ = node.CheckIPsWL(event) + } +} + +func BenchmarkCheckIPsWL_LargeAllowlist(b *testing.B) { + node := &Node{ + Logger: log.NewEntry(log.New()), + Whitelist: Whitelist{ + Reason: "test", + B_Trie: new(bart.Lite), + }, + } + + // Large allowlist: 1000 IPs + 50 CIDRs + for i := range 1000 { + ip := netip.MustParseAddr("192.168.0.1") + for range i { + ip = ip.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(ip, 32)) + } + for i := range 50 { + base := netip.MustParseAddr("10.0.0.0") + for range i * 256 { + base = base.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(base, 24)) + } + + event := &pipeline.Event{ + Meta: map[string]string{ + "source_ip": "192.168.1.100", + }, + } + + b.ResetTimer() + for b.Loop() { + _ = node.CheckIPsWL(event) + } +} + +func BenchmarkCheckIPsWL_NotInAllowlist(b *testing.B) { + node := &Node{ + Logger: log.NewEntry(log.New()), + Whitelist: Whitelist{ + Reason: "test", + B_Trie: new(bart.Lite), + }, + } + + // Add 1000 IPs but test with IP not in list + for i := range 1000 { + ip := netip.MustParseAddr("192.168.0.1") + for range i { + ip = ip.Next() + } + node.Whitelist.B_Trie.Insert(netip.PrefixFrom(ip, 32)) + } + + event := &pipeline.Event{ + Meta: map[string]string{ + "source_ip": "203.0.113.1", // Not in allowlist + }, + } + + b.ResetTimer() + for b.Loop() { + _ = node.CheckIPsWL(event) + } +}