Skip to content
Open
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
/build/

# Exclude the autogenerated parser.output file
parser.output
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.24

require (
github.com/creasty/defaults v1.8.0
github.com/davecgh/go-spew v1.1.1
github.com/emersion/go-sasl v0.0.0-20241020182733-b788ff22d5a6
github.com/emersion/go-smtp v0.23.0
github.com/google/uuid v1.6.0
Expand All @@ -24,7 +25,6 @@ require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/caarlos0/env/v11 v11.3.1 // indirect
github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fatih/color v1.18.0 // indirect
github.com/go-sql-driver/mysql v1.9.3 // indirect
github.com/goccy/go-yaml v1.13.0 // indirect
Expand All @@ -45,5 +45,8 @@ require (
golang.org/x/net v0.41.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/text v0.27.0 // indirect
golang.org/x/tools v0.34.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

tool golang.org/x/tools/cmd/goyacc
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down
141 changes: 141 additions & 0 deletions internal/filter/filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package filter

import (
"errors"
"regexp"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

const unknown string = "unknown"

var errEval = errors.New("evaluation error")

func TestFilter(t *testing.T) {
t.Parallel()

filterable := &filterableType{
key: "domain",
value: "example.com",
}

t.Run("InvalidOperator", func(t *testing.T) {
chain, err := NewChain(LogicalOp('0'), nil)
assert.Nil(t, chain)
assert.EqualError(t, err, "invalid logical operator provided: '0'")

condition, err := NewCondition("column", "unknown", "value")
assert.Nil(t, condition)
assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"")
})

t.Run("EvaluationError", func(t *testing.T) {
t.Parallel()

testInvalidData := []struct {
Expression string
}{
{"domain=" + unknown},
{"domain!=" + unknown},
{"domain<" + unknown},
{"domain<=" + unknown},
{"domain>" + unknown},
{"domain>=" + unknown},
{"domain~" + unknown},
{"domain!~" + unknown},
{"!(domain!=" + unknown + ")"},
{"domain=" + unknown + "&domain<=test.example.com"},
{"domain<=" + unknown + "|domain<=test.example.com"},
}

for _, td := range testInvalidData {
f, err := Parse(td.Expression)
assert.NoError(t, err)

matched, err := f.Eval(filterable)
assert.EqualError(t, err, errEval.Error())
assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression)
}
})

t.Run("EvaluateFilter", func(t *testing.T) {
t.Parallel()

testdata := []struct {
Expression string
Expected bool
}{
{"domain=example.com", true},
{"domain!=example.com", false},
{"domain=test.example.com", false},
{"name!=example.com", false},
{"domain", true},
{"name", false},
{"display_name", false},
{"!name", true},
{"domain~example*", true},
{"domain!~example*", false},
{"domain~example*&!domain", false},
{"domain>a", true},
{"domain<a", false},
{"domain>z", false},
{"domain<z", true},
{"domain>=example&domain<=test.example.com", true},
{"domain<=example|domain<=test.example.com", true},
{"domain<=example|domain>=test.example.com", false},
}

for _, td := range testdata {
f, err := Parse(td.Expression)
if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) {
matched, err := f.Eval(filterable)
assert.NoError(t, err)
assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression)
}
}
})
}

type filterableType struct {
key string
value string
}

func (f *filterableType) EvalEqual(_ string, value string) (bool, error) {
if value == unknown {
return false, errEval
}

return strings.EqualFold(f.value, value), nil
}

func (f *filterableType) EvalLess(_ string, value string) (bool, error) {
if value == unknown {
return false, errEval
}

return f.value < value, nil
}

func (f *filterableType) EvalLike(_ string, value string) (bool, error) {
if value == unknown {
return false, errEval
}

regex := regexp.MustCompile("^example.*$")
return regex.MatchString(f.value), nil
}

func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) {
if value == unknown {
return false, errEval
}

return f.value <= value, nil
}

func (f *filterableType) EvalExists(key string) bool {
return f.key == key
}
200 changes: 200 additions & 0 deletions internal/filter/lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
//go:generate go tool goyacc -l -v parser.output -o parser.go parser.y

package filter

import (
"errors"
"fmt"
"math"
"strings"
"text/scanner"
)

// LexerErrCode is the error code used by the lexer to indicate that it encountered an error
// while lexing the input filter string and that the parser should stop parsing.
const LexerErrCode = math.MaxInt

// tokenFriendlyNames contains a list of all the defined parser tokens and their respective
// friendly names used to output in error messages.
var tokenFriendlyNames = []struct {
have string
want string
}{
{"$end", "EOF"},
{"$unk", "unknown"},
{"T_IDENTIFIER", "'column or value'"},
{"T_EQUAL", "'='"},
{"T_UNEQUAL", "'!='"},
{"T_LIKE", "'~'"},
{"T_UNLIKE", "'!~'"},
{"T_LESS", "'<'"},
{"T_GTR", "'>'"},
{"T_LEQ", "'<='"},
{"T_GEQ", "'>='"},
{"T_LOR", "'|'"},
{"T_LAND", "'&'"},
{"T_LNOT", "'!'"},
}

// init just sets the global yyErrorVerbose variable to true.
func init() {
// Enable parsers error verbose to get more context of the parsing failures
yyErrorVerbose = true

for i, t := range yyToknames {
// Replace all parser token names by their corresponding friendly names.
for _, td := range tokenFriendlyNames {
if t == td.have {
yyToknames[i] = td.want
break
}
}
}
tokenFriendlyNames = nil // Free up memory, we don't need this anymore.
}

// Parse wraps the auto generated yyParse function.
// It parses the given filter string and returns on success a Filter instance.
func Parse(expr string) (rule Filter, err error) {
lex := new(Lexer)
lex.IsIdentRune = isIndentRune
lex.Init(strings.NewReader(expr))

// Configure the scanner mode to identify oly specific tokens we are interested in.
lex.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings
// It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just
// output to stdErr, and we won't be able to notice this. Hence, we've to register our own error handler!
lex.Scanner.Error = func(_ *scanner.Scanner, msg string) { lex.Error(msg) }

defer func() {
// All the grammar rules panics when encountering any errors while reducing the filter rules, so try
// to recover from it and return an error instead. Since we're using a named return values, we can set
// the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover
if r := recover(); r != nil {
err = errors.New(fmt.Sprint(r))
}

if err != nil {
// The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it.
rule = nil
}
}()

if yyParse(lex) != 0 {
// If the parser returns a non-zero value, it means that it encountered an error while parsing.
// The error is already set in the lexer, so we can just return it.
if lex.err != nil {
return nil, lex.err
}

// If no error was set, but the parser returned a non-zero value, we can return a generic error.
return nil, fmt.Errorf("failed to parse filter expression: %s", expr)
}

return lex.rule, nil
}

// Lexer is the lexer used by the parser to tokenize the input filter string.
//
// It embeds the scanner.Scanner to use its functionality and implements the Lex method
// to provide the tokens to the parser. The Lexer also holds the current filter rule being constructed
// by the parser and the last error encountered during lexing or parsing.
type Lexer struct {
scanner.Scanner

rule Filter // rule is the current filter rule being constructed by the parser.
err error // err is the last error encountered by the lexer or parser.
}

func (l *Lexer) Lex(yyval *yySymType) int {
tok := l.Scan()
if l.err != nil {
return LexerErrCode
}

if tok == scanner.Ident {
yyval.text = l.TokenText()
return T_IDENTIFIER
}

switch tok {
case '|':
yyval.lop = All
return T_LOR
case '&':
yyval.lop = Any
return T_LAND
case '~':
yyval.cop = Like
return T_LIKE
case '=':
yyval.cop = Equal
return T_EQUAL
case '!':
next := l.Peek()
switch next {
case '=', '~':
// Since we manually picked the next char input, we also need to advance the internal scanner
// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
l.Scan()

if next == '~' {
yyval.cop = UnLike
return T_UNLIKE
} else {
yyval.cop = UnEqual
return T_UNEQUAL
}
default:
yyval.lop = None
return T_LNOT
}
case '<':
if next := l.Peek(); next == '=' {
yyval.cop = LessThanEqual
// Since we manually picked the next char input, we also need to advance the internal scanner
// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
l.Scan()

return T_LEQ
}

yyval.cop = LessThan
return T_LESS
case '>':
if next := l.Peek(); next == '=' {
yyval.cop = GreaterThanEqual
// Since we manually picked the next char input, we also need to advance the internal scanner
// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
l.Scan()

return T_GEQ
}

yyval.cop = GreaterThan
return T_GTR
}

return int(tok)
}

// Error receives any syntax/semantic errors produced by the parser.
//
// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some
// additional context instead. This function then wraps the provided err and adds line, column number and offset
// to the error string. Error is equivalent to "yyerror" in the original yacc.
func (l *Lexer) Error(s string) {
// Don't overwrite the error if it was already set, since we want to keep the first error encountered.
if l.err == nil {
// Always reset the current filter rule when encountering an error.
l.rule = nil
l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s)
}
}

// isIndentRune provides custom implementation of scanner.IsIdentRune.
// This function determines whether a given character is allowed to be part of an identifier.
func isIndentRune(ch rune, _ int) bool {
return ch != '!' && ch != '&' && ch != '|' && ch != '~' && ch != '<' && ch != '>' &&
ch != '=' && ch != '(' && ch != ')'
}
Loading
Loading