diff --git a/.gitignore b/.gitignore index 84c048a7..3b9f9945 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /build/ + +# Exclude the autogenerated parser.output file +parser.output diff --git a/go.mod b/go.mod index 4f326099..8711da75 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.24 require ( github.com/creasty/defaults v1.8.0 + github.com/davecgh/go-spew v1.1.1 github.com/emersion/go-sasl v0.0.0-20241020182733-b788ff22d5a6 github.com/emersion/go-smtp v0.23.0 github.com/google/uuid v1.6.0 @@ -24,7 +25,6 @@ require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/caarlos0/env/v11 v11.3.1 // indirect github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a // indirect - github.com/davecgh/go-spew v1.1.1 // indirect github.com/fatih/color v1.18.0 // indirect github.com/go-sql-driver/mysql v1.9.3 // indirect github.com/goccy/go-yaml v1.13.0 // indirect @@ -45,5 +45,8 @@ require ( golang.org/x/net v0.41.0 // indirect golang.org/x/sys v0.34.0 // indirect golang.org/x/text v0.27.0 // indirect + golang.org/x/tools v0.34.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) + +tool golang.org/x/tools/cmd/goyacc diff --git a/go.sum b/go.sum index 3fccd8b0..e19be027 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= +golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go new file mode 100644 index 00000000..d4405c37 --- /dev/null +++ b/internal/filter/filter_test.go @@ -0,0 +1,141 @@ +package filter + +import ( + "errors" + "regexp" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +const unknown string = "unknown" + +var errEval = errors.New("evaluation error") + +func TestFilter(t *testing.T) { + t.Parallel() + + filterable := &filterableType{ + key: "domain", + value: "example.com", + } + + t.Run("InvalidOperator", func(t *testing.T) { + chain, err := NewChain(LogicalOp('0'), nil) + assert.Nil(t, chain) + assert.EqualError(t, err, "invalid logical operator provided: '0'") + + condition, err := NewCondition("column", "unknown", "value") + assert.Nil(t, condition) + assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"") + }) + + t.Run("EvaluationError", func(t *testing.T) { + t.Parallel() + + testInvalidData := []struct { + Expression string + }{ + {"domain=" + unknown}, + {"domain!=" + unknown}, + {"domain<" + unknown}, + {"domain<=" + unknown}, + {"domain>" + unknown}, + {"domain>=" + unknown}, + {"domain~" + unknown}, + {"domain!~" + unknown}, + {"!(domain!=" + unknown + ")"}, + {"domain=" + unknown + "&domain<=test.example.com"}, + {"domain<=" + unknown + "|domain<=test.example.com"}, + } + + for _, td := range testInvalidData { + f, err := Parse(td.Expression) + assert.NoError(t, err) + + matched, err := f.Eval(filterable) + assert.EqualError(t, err, errEval.Error()) + assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression) + } + }) + + t.Run("EvaluateFilter", func(t *testing.T) { + t.Parallel() + + testdata := []struct { + Expression string + Expected bool + }{ + {"domain=example.com", true}, + {"domain!=example.com", false}, + {"domain=test.example.com", false}, + {"name!=example.com", false}, + {"domain", true}, + {"name", false}, + {"display_name", false}, + {"!name", true}, + {"domain~example*", true}, + {"domain!~example*", false}, + {"domain~example*&!domain", false}, + {"domain>a", true}, + {"domainz", false}, + {"domain=example&domain<=test.example.com", true}, + {"domain<=example|domain<=test.example.com", true}, + {"domain<=example|domain>=test.example.com", false}, + } + + for _, td := range testdata { + f, err := Parse(td.Expression) + if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) { + matched, err := f.Eval(filterable) + assert.NoError(t, err) + assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression) + } + } + }) +} + +type filterableType struct { + key string + value string +} + +func (f *filterableType) EvalEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, errEval + } + + return strings.EqualFold(f.value, value), nil +} + +func (f *filterableType) EvalLess(_ string, value string) (bool, error) { + if value == unknown { + return false, errEval + } + + return f.value < value, nil +} + +func (f *filterableType) EvalLike(_ string, value string) (bool, error) { + if value == unknown { + return false, errEval + } + + regex := regexp.MustCompile("^example.*$") + return regex.MatchString(f.value), nil +} + +func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, errEval + } + + return f.value <= value, nil +} + +func (f *filterableType) EvalExists(key string) bool { + return f.key == key +} diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go new file mode 100644 index 00000000..eb992133 --- /dev/null +++ b/internal/filter/lexer.go @@ -0,0 +1,200 @@ +//go:generate go tool goyacc -l -v parser.output -o parser.go parser.y + +package filter + +import ( + "errors" + "fmt" + "math" + "strings" + "text/scanner" +) + +// LexerErrCode is the error code used by the lexer to indicate that it encountered an error +// while lexing the input filter string and that the parser should stop parsing. +const LexerErrCode = math.MaxInt + +// tokenFriendlyNames contains a list of all the defined parser tokens and their respective +// friendly names used to output in error messages. +var tokenFriendlyNames = []struct { + have string + want string +}{ + {"$end", "EOF"}, + {"$unk", "unknown"}, + {"T_IDENTIFIER", "'column or value'"}, + {"T_EQUAL", "'='"}, + {"T_UNEQUAL", "'!='"}, + {"T_LIKE", "'~'"}, + {"T_UNLIKE", "'!~'"}, + {"T_LESS", "'<'"}, + {"T_GTR", "'>'"}, + {"T_LEQ", "'<='"}, + {"T_GEQ", "'>='"}, + {"T_LOR", "'|'"}, + {"T_LAND", "'&'"}, + {"T_LNOT", "'!'"}, +} + +// init just sets the global yyErrorVerbose variable to true. +func init() { + // Enable parsers error verbose to get more context of the parsing failures + yyErrorVerbose = true + + for i, t := range yyToknames { + // Replace all parser token names by their corresponding friendly names. + for _, td := range tokenFriendlyNames { + if t == td.have { + yyToknames[i] = td.want + break + } + } + } + tokenFriendlyNames = nil // Free up memory, we don't need this anymore. +} + +// Parse wraps the auto generated yyParse function. +// It parses the given filter string and returns on success a Filter instance. +func Parse(expr string) (rule Filter, err error) { + lex := new(Lexer) + lex.IsIdentRune = isIndentRune + lex.Init(strings.NewReader(expr)) + + // Configure the scanner mode to identify oly specific tokens we are interested in. + lex.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings + // It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just + // output to stdErr, and we won't be able to notice this. Hence, we've to register our own error handler! + lex.Scanner.Error = func(_ *scanner.Scanner, msg string) { lex.Error(msg) } + + defer func() { + // All the grammar rules panics when encountering any errors while reducing the filter rules, so try + // to recover from it and return an error instead. Since we're using a named return values, we can set + // the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover + if r := recover(); r != nil { + err = errors.New(fmt.Sprint(r)) + } + + if err != nil { + // The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it. + rule = nil + } + }() + + if yyParse(lex) != 0 { + // If the parser returns a non-zero value, it means that it encountered an error while parsing. + // The error is already set in the lexer, so we can just return it. + if lex.err != nil { + return nil, lex.err + } + + // If no error was set, but the parser returned a non-zero value, we can return a generic error. + return nil, fmt.Errorf("failed to parse filter expression: %s", expr) + } + + return lex.rule, nil +} + +// Lexer is the lexer used by the parser to tokenize the input filter string. +// +// It embeds the scanner.Scanner to use its functionality and implements the Lex method +// to provide the tokens to the parser. The Lexer also holds the current filter rule being constructed +// by the parser and the last error encountered during lexing or parsing. +type Lexer struct { + scanner.Scanner + + rule Filter // rule is the current filter rule being constructed by the parser. + err error // err is the last error encountered by the lexer or parser. +} + +func (l *Lexer) Lex(yyval *yySymType) int { + tok := l.Scan() + if l.err != nil { + return LexerErrCode + } + + if tok == scanner.Ident { + yyval.text = l.TokenText() + return T_IDENTIFIER + } + + switch tok { + case '|': + yyval.lop = All + return T_LOR + case '&': + yyval.lop = Any + return T_LAND + case '~': + yyval.cop = Like + return T_LIKE + case '=': + yyval.cop = Equal + return T_EQUAL + case '!': + next := l.Peek() + switch next { + case '=', '~': + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + if next == '~' { + yyval.cop = UnLike + return T_UNLIKE + } else { + yyval.cop = UnEqual + return T_UNEQUAL + } + default: + yyval.lop = None + return T_LNOT + } + case '<': + if next := l.Peek(); next == '=' { + yyval.cop = LessThanEqual + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_LEQ + } + + yyval.cop = LessThan + return T_LESS + case '>': + if next := l.Peek(); next == '=' { + yyval.cop = GreaterThanEqual + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_GEQ + } + + yyval.cop = GreaterThan + return T_GTR + } + + return int(tok) +} + +// Error receives any syntax/semantic errors produced by the parser. +// +// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some +// additional context instead. This function then wraps the provided err and adds line, column number and offset +// to the error string. Error is equivalent to "yyerror" in the original yacc. +func (l *Lexer) Error(s string) { + // Don't overwrite the error if it was already set, since we want to keep the first error encountered. + if l.err == nil { + // Always reset the current filter rule when encountering an error. + l.rule = nil + l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s) + } +} + +// isIndentRune provides custom implementation of scanner.IsIdentRune. +// This function determines whether a given character is allowed to be part of an identifier. +func isIndentRune(ch rune, _ int) bool { + return ch != '!' && ch != '&' && ch != '|' && ch != '~' && ch != '<' && ch != '>' && + ch != '=' && ch != '(' && ch != ')' +} diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 71dfe6b4..9080af61 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,357 +1,549 @@ +// Code generated by goyacc -l -v parser.output -o parser.go parser.y. DO NOT EDIT. + package filter -import ( - "fmt" - "net/url" - "strings" -) +import __yyfmt__ "fmt" -type Parser struct { - tag string - pos, length, openParenthesis int -} +import "net/url" -// Parse parses an object filter expression. -func Parse(expression string) (Filter, error) { - parser := &Parser{tag: expression, length: len(expression)} - if parser.length == 0 { - return &Chain{op: All}, nil - } +type yySymType struct { + yys int + expr Filter + lop LogicalOp + cop CompOperator + text string +} - return parser.readFilter(0, "", nil) +const T_IDENTIFIER = 57346 +const T_EQUAL = 57347 +const T_UNEQUAL = 57348 +const T_LIKE = 57349 +const T_UNLIKE = 57350 +const T_LESS = 57351 +const T_GTR = 57352 +const T_LEQ = 57353 +const T_GEQ = 57354 +const T_LOR = 57355 +const T_LAND = 57356 +const T_LNOT = 57357 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "T_IDENTIFIER", + "T_EQUAL", + "T_UNEQUAL", + "T_LIKE", + "T_UNLIKE", + "T_LESS", + "T_GTR", + "T_LEQ", + "T_GEQ", + "T_LOR", + "T_LAND", + "T_LNOT", + "'('", + "')'", } -// readFilter reads the entire filter from the Parser.tag and derives a filter.Filter from it. -// Returns an error on parsing failure. -func (p *Parser) readFilter(nestingLevel int, operator string, rules []Filter) (Filter, error) { - negate := false - for p.pos < p.length { - condition, err := p.readCondition() +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +// reduceFilter reduces the left and right filters using the specified logical operator. +// +// If the left hand side filter is already of type *Chain and the provided operator is the same as the +// operator of the left hand side filter, it will not create a new chain but instead add the right hand +// side filter to the existing chain. This avoids creating nested chains of the same operator type, such as +// &Chain{op: All, &Chain{op: All, ...}} and keeps the filter structure flat. If the left hand side filter is +// not a *Chain, it will create a new chain with the specified operator and add both filters filter to it. +// +// Returns the resulting filter chain or an error if the creation of the chain fails. +func reduceFilter(left Filter, op LogicalOp, right Filter) (Filter, error) { + chain, ok := left.(*Chain) + if !ok || chain.op != op { + var err error + chain, err = NewChain(op, left) if err != nil { return nil, err } + } + chain.rules = append(chain.rules, right) - next := p.readChar() - if condition == nil { - if next == "!" { - negate = true - continue - } - - if operator == "" && len(rules) > 0 && (next == "&" || next == "|") { - operator = next - continue - } - - if next == "" { - break - } - - if next == ")" { - p.openParenthesis-- - - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } - - break - } - - return nil, p.parseError(next, "") - } - - if next == "(" { - if p.nextChar() == "&" || p.nextChar() == "|" { - // When a logical operator follows directly after the opening parenthesis "(", - // this can't be a valid expression. E.g. "!(&" - next = p.readChar() + return chain, nil +} - return nil, p.parseError(next, "") - } +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} - p.openParenthesis++ +const yyPrivate = 57344 - op := "" - if negate { - op = "!" - } +const yyLast = 34 - rule, err := p.readFilter(nestingLevel+1, op, nil) - if err != nil { - return nil, err - } - - rules = append(rules, rule) - negate = false - continue - } +var yyAct = [...]int8{ + 9, 10, 10, 3, 25, 8, 11, 12, 13, 14, + 15, 16, 18, 17, 19, 8, 6, 7, 9, 10, + 1, 5, 4, 8, 2, 0, 6, 7, 21, 20, + 22, 23, 0, 24, +} - if next == operator { - continue - } +var yyPact = [...]int16{ + 1, 5, -1000, -1000, -1000, 2, 11, 1, -1000, 1, + 1, 19, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -13, -12, -1000, -1000, -1000, +} - // When the current operator is a "!", the next one can't be a logical operator. - if operator != "!" && (next == "&" || next == "|") { - if operator == "&" { - if len(rules) > 1 { - rules = []Filter{&Chain{op: All, rules: rules}} - } +var yyPgo = [...]int8{ + 0, 20, 24, 22, 6, 21, +} - operator = next - } else if operator == "|" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +var yyR1 = [...]int8{ + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, + 5, 4, 4, 4, 4, 4, 4, 4, 4, +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +var yyR2 = [...]int8{ + 0, 1, 1, 1, 3, 3, 1, 3, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, +} - rules = append(rules, rule) - } +var yyChk = [...]int16{ + -1000, -1, -2, 2, -3, -5, 15, 16, 4, 13, + 14, -4, 5, 6, 7, 8, 9, 11, 10, 12, + -3, -1, -1, -1, -5, 17, +} - continue - } +var yyDef = [...]int8{ + 0, -2, 1, 2, 3, 6, 0, 0, 10, 0, + 0, 0, 11, 12, 13, 14, 15, 16, 17, 18, + 8, 0, 4, 5, 7, 9, +} - return nil, p.parseError(next, fmt.Sprintf("operator level %d", nestingLevel)) - } else { - if negate { - negate = false - rules = append(rules, &Chain{op: None, rules: []Filter{condition}}) - } else { - rules = append(rules, condition) - } +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 16, 17, +} - if next == "" { - break - } +var yyTok2 = [...]int8{ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, +} - if next == ")" { - p.openParenthesis-- +var yyTok3 = [...]int8{ + 0, +} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} - break - } +/* parser for yacc output */ - return nil, p.parseError(next, "") - } +var ( + yyDebug = 0 + yyErrorVerbose = false +) - if next == operator { - continue - } +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} - if next == "&" || next == "|" { - if operator == "" || operator == "&" { - if operator == "&" && len(rules) > 1 { - all := &Chain{op: All, rules: rules} - rules = []Filter{all} - } +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} - operator = next - } else if operator == "" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +func (p *yyParserImpl) Lookahead() int { + return p.char +} - rules = append(rules, rule) - } +func yyNewParser() yyParser { + return &yyParserImpl{} +} - continue - } +const yyFlag = -1000 - return nil, p.parseError(next, "") +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] } } + return __yyfmt__.Sprintf("tok-%v", c) +} - if nestingLevel == 0 && p.pos < p.length { - return nil, p.parseError(operator, "Did not read full filter") +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } } + return __yyfmt__.Sprintf("state-%v", s) +} - if nestingLevel == 0 && p.openParenthesis > 0 { - return nil, fmt.Errorf("invalid filter '%s', missing %d closing ')' at pos %d", p.tag, p.openParenthesis, p.pos) - } +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 - if nestingLevel == 0 && p.openParenthesis < 0 { - return nil, fmt.Errorf("invalid filter '%s', unexpected closing ')' at pos %d", p.tag, p.pos) + if !yyErrorVerbose { + return "syntax error" } - var chain Filter - switch operator { - case "&": - chain = &Chain{op: All, rules: rules} - case "|": - chain = &Chain{op: Any, rules: rules} - case "!": - chain = &Chain{op: None, rules: rules} - case "": - if nestingLevel == 0 && rules != nil { - // There is only one filter tag, no chain - return rules[0], nil + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg } - - chain = &Chain{op: All, rules: rules} - default: - return nil, p.parseError(operator, "") } - return chain, nil -} + res := "syntax error: unexpected " + yyTokname(lookAhead) -// readCondition reads the next filter.Filter. -// returns nil if there is no char to read and an error on parsing failure. -func (p *Parser) readCondition() (Filter, error) { - column, err := p.readColumn() - if err != nil || column == "" { - return nil, err - } + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) - operator := "" - if strings.Contains("=>= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } } - if operator == "" { - return NewExists(column), nil - } + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } - if strings.Contains(">= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) } - } - value, err := p.readValue() - if err != nil { - return nil, err + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } } - condition, err := p.createCondition(column, operator, value) - if err != nil { - return nil, err + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) } - - return condition, nil + return res } -// createCondition creates a filter.Filter based on the given operator. -// returns nil when invalid operator is given. -func (p *Parser) createCondition(column string, operator string, value string) (Filter, error) { - column = strings.TrimSpace(column) - switch operator { - case "=": - if strings.Contains(value, "*") { - return &Condition{op: Like, column: column, value: value}, nil +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out } - - return &Condition{op: Equal, column: column, value: value}, nil - case "!=": - if strings.Contains(value, "*") { - return &Condition{op: UnLike, column: column, value: value}, nil + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out } + } - return &Condition{op: UnEqual, column: column, value: value}, nil - case ">": - return &Condition{op: GreaterThan, column: column, value: value}, nil - case ">=": - return &Condition{op: GreaterThanEqual, column: column, value: value}, nil - case "<": - return &Condition{op: LessThan, column: column, value: value}, nil - case "<=": - return &Condition{op: LessThanEqual, column: column, value: value}, nil - default: - return nil, fmt.Errorf("invalid operator %s provided", operator) +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token } -// readColumn reads a column name from the Parser.tag. -// returns empty string if there is no char to read. -func (p *Parser) readColumn() (string, error) { - return url.QueryUnescape(p.readUntil("=()&|><") - if value == "" { - return "", nil +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) } - return url.QueryUnescape(value) -} + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate -// readUntil reads chars until any of the given characters -// May return empty string if there is no char to read -func (p *Parser) readUntil(chars string) string { - var buffer string - for char := p.readChar(); char != ""; char = p.readChar() { - if strings.Contains(chars, char) { - p.pos-- - break +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- } - - buffer += char + goto yystack } - return buffer -} - -// readChar peeks the next char of the Parser.tag and increments the Parser.pos by one -// returns empty if there is no char to read -func (p *Parser) readChar() string { - if p.pos < p.length { - pos := p.pos - p.pos++ +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } - return string(p.tag[pos]) + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } - return "" -} + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 -// nextChar peeks the next char from the parser tag -// returns empty string if there is no char to read -func (p *Parser) nextChar() string { - if p.pos < p.length { - return string(p.tag[p.pos]) + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } } - return "" -} + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } -// parseError returns a formatted and detailed parser error. -// If you don't provide the char that causes the parser to fail, the char at `p.pos` is automatically used. -// By specifying the `msg` arg you can provide additional err hints that can help debugging. -func (p *Parser) parseError(invalidChar string, msg string) error { - if invalidChar == "" { - pos := p.pos - if p.pos == p.length { - pos-- + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) } - - invalidChar = string(p.tag[pos]) } + // dummy call; replaced with literal code + switch yynt { - if msg != "" { - msg = ": " + msg + case 1: + yyDollar = yyS[yypt-1 : yypt+1] + { + yylex.(*Lexer).rule = yyDollar[1].expr + } + case 2: + yyDollar = yyS[yypt-1 : yypt+1] + { + return 1 // We don't recover from errors, so give up parsing. + } + case 4: + yyDollar = yyS[yypt-3 : yypt+1] + { + v, err := reduceFilter(yyDollar[1].expr, Any, yyDollar[3].expr) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + yyVAL.expr = v + } + case 5: + yyDollar = yyS[yypt-3 : yypt+1] + { + v, err := reduceFilter(yyDollar[1].expr, All, yyDollar[3].expr) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + yyVAL.expr = v + } + case 6: + yyDollar = yyS[yypt-1 : yypt+1] + { + yyVAL.expr = NewExists(yyDollar[1].text) + } + case 7: + yyDollar = yyS[yypt-3 : yypt+1] + { + cond, err := NewCondition(yyDollar[1].text, yyDollar[2].cop, yyDollar[3].text) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + yyVAL.expr = cond + } + case 8: + yyDollar = yyS[yypt-2 : yypt+1] + { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + yyVAL.expr, _ = NewChain(None, yyDollar[2].expr) + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = yyDollar[2].expr + } + case 10: + yyDollar = yyS[yypt-1 : yypt+1] + { + column, err := url.QueryUnescape(yyDollar[1].text) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + yyVAL.text = column + } } - - return fmt.Errorf("invalid filter '%s', unexpected %s at pos %d%s", p.tag, invalidChar, p.pos, msg) + goto yystack /* stack new state and value */ } diff --git a/internal/filter/parser.y b/internal/filter/parser.y new file mode 100644 index 00000000..4eab0a7d --- /dev/null +++ b/internal/filter/parser.y @@ -0,0 +1,147 @@ +%{ + +package filter + +import "net/url" + +%} + +%union { + expr Filter + lop LogicalOp + cop CompOperator + text string +} + +%token T_IDENTIFIER + +%token T_EQUAL +%token T_UNEQUAL +%token T_LIKE +%token T_UNLIKE +%token T_LESS +%token T_GTR +%token T_LEQ +%token T_GEQ + +%token T_LOR +%token T_LAND +%token T_LNOT + +%type filter_rule +%type filter_chain +%type condition_expr + +%type comparison_op +%type identifier + +%left T_LOR +%left T_LAND +%nonassoc T_EQUAL T_UNEQUAL T_LIKE T_UNLIKE +%nonassoc T_LESS T_LEQ T_GTR T_GEQ +%left T_LNOT +%left '(' + +%% + +filter_rule: filter_chain + { + yylex.(*Lexer).rule = $1 + } + | error + { + return 1 // We don't recover from errors, so give up parsing. + } + ; + +filter_chain: condition_expr + | filter_rule T_LOR filter_rule + { + v, err := reduceFilter($1, Any, $3) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + $$ = v + } + | filter_rule T_LAND filter_rule + { + v, err := reduceFilter($1, All, $3) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + $$ = v + } + ; + +condition_expr: identifier + { + $$ = NewExists($1) + } + | identifier comparison_op identifier + { + cond, err := NewCondition($1, $2, $3) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + $$ = cond + } + | T_LNOT condition_expr + { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + $$, _ = NewChain(None, $2) + } + | '(' filter_rule ')' + { + $$ = $2 + } + ; + +identifier: T_IDENTIFIER + { + column, err := url.QueryUnescape($1) + if err != nil { + yylex.Error(err.Error()) + return 1 + } + $$ = column + } + ; + +comparison_op: T_EQUAL + | T_UNEQUAL + | T_LIKE + | T_UNLIKE + | T_LESS + | T_LEQ + | T_GTR + | T_GEQ + ; + +%% + +// reduceFilter reduces the left and right filters using the specified logical operator. +// +// If the left hand side filter is already of type *Chain and the provided operator is the same as the +// operator of the left hand side filter, it will not create a new chain but instead add the right hand +// side filter to the existing chain. This avoids creating nested chains of the same operator type, such as +// &Chain{op: All, &Chain{op: All, ...}} and keeps the filter structure flat. If the left hand side filter is +// not a *Chain, it will create a new chain with the specified operator and add both filters filter to it. +// +// Returns the resulting filter chain or an error if the creation of the chain fails. +func reduceFilter(left Filter, op LogicalOp, right Filter) (Filter, error) { + chain, ok := left.(*Chain) + if !ok || chain.op != op { + var err error + chain, err = NewChain(op, left) + if err != nil { + return nil, err + } + } + chain.rules = append(chain.rules, right) + + return chain, nil +} diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index ca386f67..38cf61b5 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -1,65 +1,95 @@ package filter import ( - "github.com/stretchr/testify/assert" "strings" "testing" + + "github.com/stretchr/testify/assert" ) func TestParser(t *testing.T) { t.Parallel() - t.Run("MissingLogicalOperatorsAfterConditionsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d)e=f") - - expected := "invalid filter '(a=b|c=d)e=f', unexpected e at pos 10: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + t.Run("ParseInvalidFilters", func(t *testing.T) { + t.Parallel() - t.Run("MissingLogicalOperatorsAfterOperatorsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d|)e=f") + _, err := Parse("(a=b|c=d)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected 'column or value', expecting '|' or '&'") - expected := "invalid filter '(a=b|c=d|)e=f', unexpected e at pos 11: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + _, err = Parse("(a=b|c=d|)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected ')', expecting 'column or value' or '!' or '('") - t.Run("ParserIdentifiesInvalidExpression", func(t *testing.T) { - _, err := Parse("col=(") - assert.EqualError(t, err, "invalid filter 'col=(', unexpected ( at pos 5", "Errors should be the same") + _, err = Parse("col=(") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected '(', expecting 'column or value'") _, err = Parse("(((x=a)&y=b") - assert.EqualError(t, err, "invalid filter '(((x=a)&y=b', missing 2 closing ')' at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:12 (11): syntax error: unexpected EOF, expecting '|' or '&' or ')'") _, err = Parse("(x=a)&y=b)") - assert.EqualError(t, err, "invalid filter '(x=a)&y=b)', unexpected ) at pos 10", "Errors should be the same") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected ')', expecting '|' or '&'") _, err = Parse("!(&") - assert.EqualError(t, err, "invalid filter '!(&', unexpected & at pos 3", "Errors should be the same") - - _, err = Parse("!(!&") - assert.EqualError(t, err, "invalid filter '!(!&', unexpected & at pos 4: operator level 1", "Errors should be the same") - - _, err = Parse("!(|test") - assert.EqualError(t, err, "invalid filter '!(|test', unexpected | at pos 3", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected '&', expecting 'column or value' or '!' or '('") _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=(te(st)', unexpected ( at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:9 (8): syntax error: unexpected '(', expecting 'column or value'") _, err = Parse("foo&bar=te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=te(st)', unexpected ( at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:11 (10): syntax error: unexpected '(', expecting '|' or '&'") _, err = Parse("foo&bar=test)") - assert.EqualError(t, err, "invalid filter 'foo&bar=test)', unexpected ) at pos 13", "Errors should be the same") + assert.EqualError(t, err, "1:13 (12): syntax error: unexpected ')', expecting '|' or '&'") _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "invalid filter '!()|&()&)', unexpected closing ')' at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected ')', expecting 'column or value' or '!' or '('") + + _, err = Parse("=foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected '=', expecting 'column or value' or '!' or '('") + + _, err = Parse("foo>") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected EOF, expecting 'column or value'") + + _, err = Parse("foo==") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected '=', expecting 'column or value'") + + _, err = Parse("=>foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected '=', expecting 'column or value' or '!' or '('") + + _, err = Parse("&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected '&', expecting 'column or value' or '!' or '('") + + _, err = Parse("&&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected '&', expecting 'column or value' or '!' or '('") + + _, err = Parse("(&foo=bar)") + assert.EqualError(t, err, "1:2 (1): syntax error: unexpected '&', expecting 'column or value' or '!' or '('") + + _, err = Parse("(foo=bar|)") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected ')', expecting 'column or value' or '!' or '('") + + _, err = Parse("((((((") + assert.EqualError(t, err, "1:7 (6): syntax error: unexpected EOF, expecting 'column or value' or '!' or '('") + + _, err = Parse("foo&bar&col=val!=val") + assert.EqualError(t, err, "1:17 (16): syntax error: unexpected '!=', expecting '|' or '&'") + + _, err = Parse("col%7umn") + assert.EqualError(t, err, "1:1 (0): invalid URL escape \"%7u\"") + + _, err = Parse("((0&((((((((((((((((((((((0=0)") + assert.EqualError(t, err, "1:31 (30): syntax error: unexpected EOF, expecting '|' or '&' or ')'") + + // IPL web filter parser accepts such invalid strings, but our Lexer doesn't. + _, err = Parse("foo\x00") + assert.EqualError(t, err, "1:1 (0): invalid character NUL") + + _, err = Parse("\xff") + assert.EqualError(t, err, "0:0 (0): invalid UTF-8 encoding") }) -} -func TestFilter(t *testing.T) { - t.Parallel() + t.Run("ParseAllKindOfSimpleFilters", func(t *testing.T) { + t.Parallel() - t.Run("ParserIdentifiesAllKindOfFilters", func(t *testing.T) { rule, err := Parse("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) expected := &Condition{op: Equal, column: "foo", value: "bar"} @@ -70,12 +100,12 @@ func TestFilter(t *testing.T) { expected = &Condition{op: UnEqual, column: "foo", value: "bar"} assert.Equal(t, expected, rule) - rule, err = Parse("foo=bar*") + rule, err = Parse("foo~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: Like, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) - rule, err = Parse("foo!=bar*") + rule, err = Parse("foo!~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: UnLike, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) @@ -119,42 +149,282 @@ func TestFilter(t *testing.T) { rule, err = Parse("foo") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.Equal(t, &Exists{column: "foo"}, rule) + }) - rule, err = Parse("!(foo=bar|bar=foo)&(foo=bar|bar=foo)") + t.Run("ParseChain", func(t *testing.T) { + t.Parallel() + + var expected Filter + rule, err := Parse("!foo=bar") + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} assert.Nil(t, err, "There should be no errors but got: %s", err) + assert.Equal(t, expected, rule) - expectedChain := &Chain{op: All, rules: []Filter{ - &Chain{op: None, rules: []Filter{ + rule, err = Parse("foo=bar&bar=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar=foo|col=val") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, + &Condition{op: Equal, column: "col", value: "val"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar|bar=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: Any, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Condition{op: Equal, column: "foo", value: "bar"} + assert.Equal(t, expected, rule) + + rule, err = Parse("(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ &Chain{op: Any, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, }} - assert.Equal(t, expectedChain, rule) - }) + assert.Equal(t, expected, rule) - t.Run("ParserIdentifiesSingleCondition", func(t *testing.T) { - rule, err := Parse("foo=bar") + rule, err = Parse("((!foo=bar)&bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) - expected := &Condition{op: Equal, column: "foo", value: "bar"} - assert.Equal(t, expected, rule, "Parser does not parse single condition correctly") + rule, err = Parse("!foo&!bar") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, + &Chain{op: None, rules: []Filter{&Exists{column: "bar"}}}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo|bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, + &Exists{column: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!(foo|bar))") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ + &Exists{column: "foo"}, + &Exists{column: "bar"}}, + }, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)&(foo!=bar|bar!=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }}, + }}, + &Chain{op: Any, rules: []Filter{ + &Condition{op: UnEqual, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo&john>doe|doedoe|doedoe|doedoe|doebar") @@ -205,10 +481,41 @@ func FuzzParser(f *testing.F) { f.Add("col%29umn>val%29ue") f.Fuzz(func(t *testing.T, expr string) { - _, err := Parse(expr) + rule, err := Parse(expr) + t.Logf("Parsing filter expression %q - ERROR: %v", expr, err) if strings.Count(expr, "(") != strings.Count(expr, ")") { assert.Error(t, err) + assert.Nil(t, rule) + } else if err == nil && !strings.ContainsAny(expr, "!&|!>~<=") { + assert.IsType(t, new(Exists), rule) } }) } + +func BenchmarkParser(b *testing.B) { + expr := "foo=bar&bar!=foo&column=value" + // Add more complexity to the expression by nesting conditions + for i := 0; i < 100; i++ { + switch i % 4 { + case 0: + expr = "(" + expr + "&col" + string(rune('A'+(i%26))) + "=val" + string(rune('a'+(i%26))) + ")" + case 1: + expr = "!(" + expr + "|col" + string(rune('A'+(i%26))) + "!=val" + string(rune('a'+(i%26))) + ")" + case 2: + expr = "(" + expr + "|col" + string(rune('A'+(i%26))) + ">=val" + string(rune('a'+(i%26))) + ")" + case 3: + expr = "!(" + expr + "&col" + string(rune('A'+(i%26))) + "<=val" + string(rune('a'+(i%26))) + ")" + } + } + expr += "&" + strings.Repeat("x~y*|", 50) + "z~*w*|!((foo=bar|baz!=qux)&(!(alpha=beta)|gamma<=delta))" + b.Logf("Benchmarking filter expression: %s", expr) + + b.ReportAllocs() // Report allocations statistics + + for b.Loop() { + if _, err := Parse(expr); err != nil { + b.Fatalf("Failed to parse filter expression: %s", err) + } + } +} diff --git a/internal/filter/types.go b/internal/filter/types.go index 4c1104d0..032b6de1 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -5,25 +5,33 @@ import ( ) // LogicalOp is a type used for grouping the logical operators of a filter string. -type LogicalOp string +type LogicalOp byte const ( // None represents a filter chain type that matches when none of its ruleset matches. - None LogicalOp = "!" + None LogicalOp = '!' // All represents a filter chain type that matches when all of its ruleset matches. - All LogicalOp = "&" + All LogicalOp = '&' // Any represents a filter chain type that matches when at least one of its ruleset matches. - Any LogicalOp = "|" + Any LogicalOp = '|' ) // Chain is a filter type that wraps other filter rules and itself. // Therefore, it implements the Filter interface to allow it to be part of its ruleset. -// It supports also adding and popping filter rules individually. type Chain struct { op LogicalOp // The filter chain operator to be used to evaluate the rules rules []Filter } +func NewChain(op LogicalOp, rules ...Filter) (*Chain, error) { + switch op { + case None, All, Any: + return &Chain{rules: rules, op: op}, nil + default: + return nil, fmt.Errorf("invalid logical operator provided: %q", op) + } +} + // Eval evaluates the filter rule sets recursively based on their operator type. func (c *Chain) Eval(filterable Filterable) (bool, error) { switch c.op { @@ -105,6 +113,17 @@ type Condition struct { value string } +// NewCondition initiates a new Condition instance from the given data. +// Returns error if invalid CompOperator is provided. +func NewCondition(column string, op CompOperator, value string) (Filter, error) { + switch op { + case Equal, UnEqual, Like, UnLike, LessThan, LessThanEqual, GreaterThan, GreaterThanEqual: + return &Condition{op: op, column: column, value: value}, nil + default: + return nil, fmt.Errorf("invalid comparison operator provided: %q", op) + } +} + // Eval evaluates this Condition based on its operator. // Returns true when the filter evaluates to true false otherwise. func (c *Condition) Eval(filterable Filterable) (bool, error) { diff --git a/internal/object/object_test.go b/internal/object/object_test.go index ee6f250f..1989adce 100644 --- a/internal/object/object_test.go +++ b/internal/object/object_test.go @@ -29,8 +29,8 @@ func TestFilter(t *testing.T) { {"Host", false}, {"service", false}, {"!service", true}, - {"host=*.example.com&hostgroup/database-server", true}, - {"host=*.example.com&!hostgroup/database-server", false}, + {"host~*.example.com&hostgroup/database-server", true}, + {"host~*.example.com&!hostgroup/database-server", false}, {"!service&(country=DE&hostgroup/database-server)", true}, {"!service&!(country=AT|country=CH)", true}, {"hostgroup/Nuremberg %28Germany%29", true},