Icinga · yhabteab · Oct 26, 2023 · Aug 13, 2024 · May 2, 2023 · May 2, 2023
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 /build/
+
+# Exclude the autogenerated parser.output file
+parser.output
diff --git a/go.mod b/go.mod
@@ -4,6 +4,7 @@ go 1.24
 
 require (
 	github.com/creasty/defaults v1.8.0
+	github.com/davecgh/go-spew v1.1.1
 	github.com/emersion/go-sasl v0.0.0-20241020182733-b788ff22d5a6
 	github.com/emersion/go-smtp v0.23.0
 	github.com/google/uuid v1.6.0
@@ -24,7 +25,6 @@ require (
 	filippo.io/edwards25519 v1.1.0 // indirect
 	github.com/caarlos0/env/v11 v11.3.1 // indirect
 	github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a // indirect
-	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/fatih/color v1.18.0 // indirect
 	github.com/go-sql-driver/mysql v1.9.3 // indirect
 	github.com/goccy/go-yaml v1.13.0 // indirect
@@ -45,5 +45,8 @@ require (
 	golang.org/x/net v0.41.0 // indirect
 	golang.org/x/sys v0.34.0 // indirect
 	golang.org/x/text v0.27.0 // indirect
+	golang.org/x/tools v0.34.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
+
+tool golang.org/x/tools/cmd/goyacc
diff --git a/go.sum b/go.sum
@@ -100,6 +100,8 @@ golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
 golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
 golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
+golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
+golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go
@@ -0,0 +1,141 @@
+package filter
+
+import (
+	"errors"
+	"regexp"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+const unknown string = "unknown"
+
+var errEval = errors.New("evaluation error")
+
+func TestFilter(t *testing.T) {
+	t.Parallel()
+
+	filterable := &filterableType{
+		key:   "domain",
+		value: "example.com",
+	}
+
+	t.Run("InvalidOperator", func(t *testing.T) {
+		chain, err := NewChain(LogicalOp('0'), nil)
+		assert.Nil(t, chain)
+		assert.EqualError(t, err, "invalid logical operator provided: '0'")
+
+		condition, err := NewCondition("column", "unknown", "value")
+		assert.Nil(t, condition)
+		assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"")
+	})
+
+	t.Run("EvaluationError", func(t *testing.T) {
+		t.Parallel()
+
+		testInvalidData := []struct {
+			Expression string
+		}{
+			{"domain=" + unknown},
+			{"domain!=" + unknown},
+			{"domain<" + unknown},
+			{"domain<=" + unknown},
+			{"domain>" + unknown},
+			{"domain>=" + unknown},
+			{"domain~" + unknown},
+			{"domain!~" + unknown},
+			{"!(domain!=" + unknown + ")"},
+			{"domain=" + unknown + "&domain<=test.example.com"},
+			{"domain<=" + unknown + "|domain<=test.example.com"},
+		}
+
+		for _, td := range testInvalidData {
+			f, err := Parse(td.Expression)
+			assert.NoError(t, err)
+
+			matched, err := f.Eval(filterable)
+			assert.EqualError(t, err, errEval.Error())
+			assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression)
+		}
+	})
+
+	t.Run("EvaluateFilter", func(t *testing.T) {
+		t.Parallel()
+
+		testdata := []struct {
+			Expression string
+			Expected   bool
+		}{
+			{"domain=example.com", true},
+			{"domain!=example.com", false},
+			{"domain=test.example.com", false},
+			{"name!=example.com", false},
+			{"domain", true},
+			{"name", false},
+			{"display_name", false},
+			{"!name", true},
+			{"domain~example*", true},
+			{"domain!~example*", false},
+			{"domain~example*&!domain", false},
+			{"domain>a", true},
+			{"domain<a", false},
+			{"domain>z", false},
+			{"domain<z", true},
+			{"domain>=example&domain<=test.example.com", true},
+			{"domain<=example|domain<=test.example.com", true},
+			{"domain<=example|domain>=test.example.com", false},
+		}
+
+		for _, td := range testdata {
+			f, err := Parse(td.Expression)
+			if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) {
+				matched, err := f.Eval(filterable)
+				assert.NoError(t, err)
+				assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression)
+			}
+		}
+	})
+}
+
+type filterableType struct {
+	key   string
+	value string
+}
+
+func (f *filterableType) EvalEqual(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, errEval
+	}
+
+	return strings.EqualFold(f.value, value), nil
+}
+
+func (f *filterableType) EvalLess(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, errEval
+	}
+
+	return f.value < value, nil
+}
+
+func (f *filterableType) EvalLike(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, errEval
+	}
+
+	regex := regexp.MustCompile("^example.*$")
+	return regex.MatchString(f.value), nil
+}
+
+func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, errEval
+	}
+
+	return f.value <= value, nil
+}
+
+func (f *filterableType) EvalExists(key string) bool {
+	return f.key == key
+}
diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go
@@ -0,0 +1,200 @@
+//go:generate go tool goyacc -l -v parser.output -o parser.go parser.y
+
+package filter
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strings"
+	"text/scanner"
+)
+
+// LexerErrCode is the error code used by the lexer to indicate that it encountered an error
+// while lexing the input filter string and that the parser should stop parsing.
+const LexerErrCode = math.MaxInt
+
+// tokenFriendlyNames contains a list of all the defined parser tokens and their respective
+// friendly names used to output in error messages.
+var tokenFriendlyNames = []struct {
+	have string
+	want string
+}{
+	{"$end", "EOF"},
+	{"$unk", "unknown"},
+	{"T_IDENTIFIER", "'column or value'"},
+	{"T_EQUAL", "'='"},
+	{"T_UNEQUAL", "'!='"},
+	{"T_LIKE", "'~'"},
+	{"T_UNLIKE", "'!~'"},
+	{"T_LESS", "'<'"},
+	{"T_GTR", "'>'"},
+	{"T_LEQ", "'<='"},
+	{"T_GEQ", "'>='"},
+	{"T_LOR", "'|'"},
+	{"T_LAND", "'&'"},
+	{"T_LNOT", "'!'"},
+}
+
+// init just sets the global yyErrorVerbose variable to true.
+func init() {
+	// Enable parsers error verbose to get more context of the parsing failures
+	yyErrorVerbose = true
+
+	for i, t := range yyToknames {
+		// Replace all parser token names by their corresponding friendly names.
+		for _, td := range tokenFriendlyNames {
+			if t == td.have {
+				yyToknames[i] = td.want
+				break
+			}
+		}
+	}
+	tokenFriendlyNames = nil // Free up memory, we don't need this anymore.
+}
+
+// Parse wraps the auto generated yyParse function.
+// It parses the given filter string and returns on success a Filter instance.
+func Parse(expr string) (rule Filter, err error) {
+	lex := new(Lexer)
+	lex.IsIdentRune = isIndentRune
+	lex.Init(strings.NewReader(expr))
+
+	// Configure the scanner mode to identify oly specific tokens we are interested in.
+	lex.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings
+	// It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just
+	// output to stdErr, and we won't be able to notice this. Hence, we've to register our own error handler!
+	lex.Scanner.Error = func(_ *scanner.Scanner, msg string) { lex.Error(msg) }
+
+	defer func() {
+		// All the grammar rules panics when encountering any errors while reducing the filter rules, so try
+		// to recover from it and return an error instead. Since we're using a named return values, we can set
+		// the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover
+		if r := recover(); r != nil {
+			err = errors.New(fmt.Sprint(r))
+		}
+
+		if err != nil {
+			// The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it.
+			rule = nil
+		}
+	}()
+
+	if yyParse(lex) != 0 {
+		// If the parser returns a non-zero value, it means that it encountered an error while parsing.
+		// The error is already set in the lexer, so we can just return it.
+		if lex.err != nil {
+			return nil, lex.err
+		}
+
+		// If no error was set, but the parser returned a non-zero value, we can return a generic error.
+		return nil, fmt.Errorf("failed to parse filter expression: %s", expr)
+	}
+
+	return lex.rule, nil
+}
+
+// Lexer is the lexer used by the parser to tokenize the input filter string.
+//
+// It embeds the scanner.Scanner to use its functionality and implements the Lex method
+// to provide the tokens to the parser. The Lexer also holds the current filter rule being constructed
+// by the parser and the last error encountered during lexing or parsing.
+type Lexer struct {
+	scanner.Scanner
+
+	rule Filter // rule is the current filter rule being constructed by the parser.
+	err  error  // err is the last error encountered by the lexer or parser.
+}
+
+func (l *Lexer) Lex(yyval *yySymType) int {
+	tok := l.Scan()
+	if l.err != nil {
+		return LexerErrCode
+	}
+
+	if tok == scanner.Ident {
+		yyval.text = l.TokenText()
+		return T_IDENTIFIER
+	}
+
+	switch tok {
+	case '|':
+		yyval.lop = All
+		return T_LOR
+	case '&':
+		yyval.lop = Any
+		return T_LAND
+	case '~':
+		yyval.cop = Like
+		return T_LIKE
+	case '=':
+		yyval.cop = Equal
+		return T_EQUAL
+	case '!':
+		next := l.Peek()
+		switch next {
+		case '=', '~':
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			if next == '~' {
+				yyval.cop = UnLike
+				return T_UNLIKE
+			} else {
+				yyval.cop = UnEqual
+				return T_UNEQUAL
+			}
+		default:
+			yyval.lop = None
+			return T_LNOT
+		}
+	case '<':
+		if next := l.Peek(); next == '=' {
+			yyval.cop = LessThanEqual
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			return T_LEQ
+		}
+
+		yyval.cop = LessThan
+		return T_LESS
+	case '>':
+		if next := l.Peek(); next == '=' {
+			yyval.cop = GreaterThanEqual
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			return T_GEQ
+		}
+
+		yyval.cop = GreaterThan
+		return T_GTR
+	}
+
+	return int(tok)
+}
+
+// Error receives any syntax/semantic errors produced by the parser.
+//
+// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some
+// additional context instead. This function then wraps the provided err and adds line, column number and offset
+// to the error string. Error is equivalent to "yyerror" in the original yacc.
+func (l *Lexer) Error(s string) {
+	// Don't overwrite the error if it was already set, since we want to keep the first error encountered.
+	if l.err == nil {
+		// Always reset the current filter rule when encountering an error.
+		l.rule = nil
+		l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s)
+	}
+}
+
+// isIndentRune provides custom implementation of scanner.IsIdentRune.
+// This function determines whether a given character is allowed to be part of an identifier.
+func isIndentRune(ch rune, _ int) bool {
+	return ch != '!' && ch != '&' && ch != '|' && ch != '~' && ch != '<' && ch != '>' &&
+		ch != '=' && ch != '(' && ch != ')'
+}