diff --git a/pkg/exprhelpers/expr_lib.go b/pkg/exprhelpers/expr_lib.go index 40ec27d6af2..3801f5fbaae 100644 --- a/pkg/exprhelpers/expr_lib.go +++ b/pkg/exprhelpers/expr_lib.go @@ -461,6 +461,13 @@ var exprFuncs = []exprCustomFunc{ new(func(string, map[string]any, string) error), }, }, + { + name: "ParseKVLax", + function: ParseKVLax, + signature: []any{ + new(func(string, map[string]any, string) error), + }, + }, { name: "Hostname", function: Hostname, diff --git a/pkg/exprhelpers/exprlib_test.go b/pkg/exprhelpers/exprlib_test.go index cdcf440074d..0bf779a34c0 100644 --- a/pkg/exprhelpers/exprlib_test.go +++ b/pkg/exprhelpers/exprlib_test.go @@ -2201,3 +2201,111 @@ func TestParseKv(t *testing.T) { }) } } + +func TestParseKvLax(t *testing.T) { + err := Init(nil) + require.NoError(t, err) + + tests := []struct { + name string + value string + want map[string]string + expr string + wantBuildErr bool + wantRuntimeErr bool + }{ + { + name: "ParseKVLax() test: valid string", + value: "foo=bar", + want: map[string]string{"foo": "bar"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: valid string multiple", + value: "foo=bar bar=foo", + want: map[string]string{"foo": "bar", "bar": "foo"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: quoted string", + value: `foo="bar=toto"`, + want: map[string]string{"foo": "bar=toto"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: empty unquoted string", + value: `foo= bar=toto`, + want: map[string]string{"bar": "toto", "foo": ""}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: empty quoted string", + value: `foo="" bar=toto`, + want: map[string]string{"bar": "toto", "foo": ""}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: unquoted value with spaces", + value: `UNIFIhost=Express 7 port=443`, + want: map[string]string{"UNIFIhost": "Express 7", "port": "443"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: mixed quoted and unquoted with spaces", + value: `msg="Hello World" host=My Server name=test`, + want: map[string]string{"msg": "Hello World", "host": "My Server", "name": "test"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: escaped quotes in quoted value", + value: `msg="He said \"Hello\"" status=ok`, + want: map[string]string{"msg": `He said "Hello"`, "status": "ok"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: escaped backslashes in quoted value", + value: `path="C:\\Program Files\\App" status=running`, + want: map[string]string{"path": `C:\Program Files\App`, "status": "running"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: empty unquoted value at end", + value: `host=server port=443 debug=`, + want: map[string]string{"host": "server", "port": "443", "debug": ""}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: complex CEF-like log extension", + value: `src=192.168.1.100 duser=admin msg=User login successful UNIFIhost=Express 7 UNIFIport=443`, + want: map[string]string{"src": "192.168.1.100", "duser": "admin", "msg": "User login successful", "UNIFIhost": "Express 7", "UNIFIport": "443"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: iptables-style values with flags", + value: `RES=0x00 SYN URGP=0 ID=25029 DF PROTO=TCP`, + want: map[string]string{"RES": "0x00 SYN", "URGP": "0", "ID": "25029 DF", "PROTO": "TCP"}, + expr: `ParseKVLax(value, out, "a")`, + }, + { + name: "ParseKVLax() test: keycloak-style JSON values", + value: `error=user_not_found, code_id=e44d80b4-058d-4b45-b2ee-fac3d174e10c, userId=null, type=LOGIN_ERROR`, + want: map[string]string{"error": "user_not_found,", "code_id": "e44d80b4-058d-4b45-b2ee-fac3d174e10c,", "userId": "null,", "type": "LOGIN_ERROR"}, + expr: `ParseKVLax(value, out, "a")`, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + outMap := make(map[string]any) + env := map[string]any{ + "value": tc.value, + "out": outMap, + } + vm, err := expr.Compile(tc.expr, GetExprOptions(env)...) + require.NoError(t, err) + _, err = expr.Run(vm, env) + require.NoError(t, err) + assert.Equal(t, tc.want, outMap["a"]) + }) + } +} diff --git a/pkg/exprhelpers/helpers.go b/pkg/exprhelpers/helpers.go index 62653e39185..1fa54766750 100644 --- a/pkg/exprhelpers/helpers.go +++ b/pkg/exprhelpers/helpers.go @@ -61,6 +61,7 @@ func init() { //nolint:gochecknoinits } var keyValuePattern = regexp.MustCompile(`(?P[^=\s]+)=(?:"(?P[^"\\]*(?:\\.[^"\\]*)*)"|(?P[^=\s]+)|\s*)`) +var keyStart = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.-]*)=`) // More restrictive key pattern for loose parsing var ( geoIPCityReader *geoip2.Reader @@ -989,6 +990,113 @@ func ParseKV(params ...any) (any, error) { return nil, nil } +// ParseKVLax parses key-value pairs with lax matching, supporting unquoted multi-word values +// by using a scanner approach instead of regex. +func ParseKVLax(params ...any) (any, error) { + blob := params[0].(string) + target := params[1].(map[string]any) + prefix := params[2].(string) + + if _, ok := target[prefix]; !ok { + target[prefix] = make(map[string]string) + } else if _, ok := target[prefix].(map[string]string); !ok { + log.Errorf("ParseKVLax: target is not a map[string]string") + return nil, errors.New("target is not a map[string]string") + } + + km := target[prefix].(map[string]string) + + // Find all key= occurrences and slice values between them. + idxs := keyStart.FindAllStringSubmatchIndex(blob, -1) + if len(idxs) == 0 { + log.Errorf("could not find any key/value pair in line") + return nil, errors.New("invalid input format") + } + + // Filter out matches that are inside quoted values + validIdxs := make([][]int, 0, len(idxs)) + for _, m := range idxs { + keyStartPos := m[0] + // Check if this key= is inside a quoted value by looking backwards + if !isInsideQuotedValue(blob, keyStartPos) { + validIdxs = append(validIdxs, m) + } + } + + if len(validIdxs) == 0 { + log.Errorf("could not find any key/value pair in line") + return nil, errors.New("invalid input format") + } + + for i, m := range validIdxs { + // m layout: [ fullStart, fullEnd, group1Start, group1End ] + key := blob[m[2]:m[3]] + valStart := m[1] // right after '=' + + var valEnd int + if i+1 < len(validIdxs) { + valEnd = validIdxs[i+1][0] // start of next key + } else { + valEnd = len(blob) + } + + raw := strings.TrimSpace(blob[valStart:valEnd]) + val := parseValueLax(raw) + km[key] = val + } + + log.Tracef("unmarshaled KV (lax): %+v", target[prefix]) + return nil, nil +} + +// parseValueLax handles quoted and unquoted values for lax parsing. +// - If it begins with a quote, it removes the surrounding quotes +// if the closing one is present and unescapes \" and \\. +// - For unquoted values, returns the entire trimmed value as-is +func parseValueLax(s string) string { + if s == "" { + return "" + } + + if s[0] == '"' { + if len(s) >= 2 && s[len(s)-1] == '"' { + body := s[1 : len(s)-1] + body = strings.ReplaceAll(body, `\\`, `\`) + body = strings.ReplaceAll(body, `\"`, `"`) + return body + } + return strings.TrimPrefix(s, `"`) + } + return s +} + +// isInsideQuotedValue checks if a position in the string is inside a quoted value +// by counting unescaped quotes before the position +func isInsideQuotedValue(s string, pos int) bool { + inQuote := false + + for i := 0; i <= pos && i < len(s); i++ { + if s[i] != '"' { + continue + } + // Check if this quote is escaped + escaped := false + backslashCount := 0 + for j := i - 1; j >= 0 && s[j] == '\\'; j-- { + backslashCount++ + } + if backslashCount%2 == 1 { + escaped = true + } + + if !escaped { + inQuote = !inQuote + } + } + + return inQuote +} + func Hostname(params ...any) (any, error) { hostname, err := os.Hostname() if err != nil {