Skip to content

Commit 62a2bcb

Browse files
feat: improve ParseKV function to handle unquoted values with spaces
- Replace regex-based parsing with scanner approach for better handling of complex key-value pairs - Add support for unquoted values containing spaces (e.g., UNIFIhost=Express 7) - Maintain backward compatibility with existing quoted and simple unquoted values - Add robust filtering to prevent false positives from invalid key patterns - Improve quote handling and escaping for quoted values - Add comprehensive test cases covering edge cases and mixed scenarios Fixes parsing issues with CEF logs and other formats where values contain spaces without quotes.
1 parent e40f284 commit 62a2bcb

File tree

3 files changed

+222
-0
lines changed

3 files changed

+222
-0
lines changed

pkg/exprhelpers/expr_lib.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,13 @@ var exprFuncs = []exprCustomFunc{
461461
new(func(string, map[string]any, string) error),
462462
},
463463
},
464+
{
465+
name: "ParseKVLax",
466+
function: ParseKVLax,
467+
signature: []any{
468+
new(func(string, map[string]any, string) error),
469+
},
470+
},
464471
{
465472
name: "Hostname",
466473
function: Hostname,

pkg/exprhelpers/exprlib_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,3 +2201,111 @@ func TestParseKv(t *testing.T) {
22012201
})
22022202
}
22032203
}
2204+
2205+
func TestParseKvLax(t *testing.T) {
2206+
err := Init(nil)
2207+
require.NoError(t, err)
2208+
2209+
tests := []struct {
2210+
name string
2211+
value string
2212+
want map[string]string
2213+
expr string
2214+
wantBuildErr bool
2215+
wantRuntimeErr bool
2216+
}{
2217+
{
2218+
name: "ParseKVLax() test: valid string",
2219+
value: "foo=bar",
2220+
want: map[string]string{"foo": "bar"},
2221+
expr: `ParseKVLax(value, out, "a")`,
2222+
},
2223+
{
2224+
name: "ParseKVLax() test: valid string multiple",
2225+
value: "foo=bar bar=foo",
2226+
want: map[string]string{"foo": "bar", "bar": "foo"},
2227+
expr: `ParseKVLax(value, out, "a")`,
2228+
},
2229+
{
2230+
name: "ParseKVLax() test: quoted string",
2231+
value: `foo="bar=toto"`,
2232+
want: map[string]string{"foo": "bar=toto"},
2233+
expr: `ParseKVLax(value, out, "a")`,
2234+
},
2235+
{
2236+
name: "ParseKVLax() test: empty unquoted string",
2237+
value: `foo= bar=toto`,
2238+
want: map[string]string{"bar": "toto", "foo": ""},
2239+
expr: `ParseKVLax(value, out, "a")`,
2240+
},
2241+
{
2242+
name: "ParseKVLax() test: empty quoted string",
2243+
value: `foo="" bar=toto`,
2244+
want: map[string]string{"bar": "toto", "foo": ""},
2245+
expr: `ParseKVLax(value, out, "a")`,
2246+
},
2247+
{
2248+
name: "ParseKVLax() test: unquoted value with spaces",
2249+
value: `UNIFIhost=Express 7 port=443`,
2250+
want: map[string]string{"UNIFIhost": "Express 7", "port": "443"},
2251+
expr: `ParseKVLax(value, out, "a")`,
2252+
},
2253+
{
2254+
name: "ParseKVLax() test: mixed quoted and unquoted with spaces",
2255+
value: `msg="Hello World" host=My Server name=test`,
2256+
want: map[string]string{"msg": "Hello World", "host": "My Server", "name": "test"},
2257+
expr: `ParseKVLax(value, out, "a")`,
2258+
},
2259+
{
2260+
name: "ParseKVLax() test: escaped quotes in quoted value",
2261+
value: `msg="He said \"Hello\"" status=ok`,
2262+
want: map[string]string{"msg": `He said "Hello"`, "status": "ok"},
2263+
expr: `ParseKVLax(value, out, "a")`,
2264+
},
2265+
{
2266+
name: "ParseKVLax() test: escaped backslashes in quoted value",
2267+
value: `path="C:\\Program Files\\App" status=running`,
2268+
want: map[string]string{"path": `C:\Program Files\App`, "status": "running"},
2269+
expr: `ParseKVLax(value, out, "a")`,
2270+
},
2271+
{
2272+
name: "ParseKVLax() test: empty unquoted value at end",
2273+
value: `host=server port=443 debug=`,
2274+
want: map[string]string{"host": "server", "port": "443", "debug": ""},
2275+
expr: `ParseKVLax(value, out, "a")`,
2276+
},
2277+
{
2278+
name: "ParseKVLax() test: complex CEF-like log extension",
2279+
value: `src=192.168.1.100 duser=admin msg=User login successful UNIFIhost=Express 7 UNIFIport=443`,
2280+
want: map[string]string{"src": "192.168.1.100", "duser": "admin", "msg": "User login successful", "UNIFIhost": "Express 7", "UNIFIport": "443"},
2281+
expr: `ParseKVLax(value, out, "a")`,
2282+
},
2283+
{
2284+
name: "ParseKVLax() test: iptables-style values with flags",
2285+
value: `RES=0x00 SYN URGP=0 ID=25029 DF PROTO=TCP`,
2286+
want: map[string]string{"RES": "0x00 SYN", "URGP": "0", "ID": "25029 DF", "PROTO": "TCP"},
2287+
expr: `ParseKVLax(value, out, "a")`,
2288+
},
2289+
{
2290+
name: "ParseKVLax() test: keycloak-style JSON values",
2291+
value: `error=user_not_found, code_id=e44d80b4-058d-4b45-b2ee-fac3d174e10c, userId=null, type=LOGIN_ERROR`,
2292+
want: map[string]string{"error": "user_not_found,", "code_id": "e44d80b4-058d-4b45-b2ee-fac3d174e10c,", "userId": "null,", "type": "LOGIN_ERROR"},
2293+
expr: `ParseKVLax(value, out, "a")`,
2294+
},
2295+
}
2296+
2297+
for _, tc := range tests {
2298+
t.Run(tc.name, func(t *testing.T) {
2299+
outMap := make(map[string]any)
2300+
env := map[string]any{
2301+
"value": tc.value,
2302+
"out": outMap,
2303+
}
2304+
vm, err := expr.Compile(tc.expr, GetExprOptions(env)...)
2305+
require.NoError(t, err)
2306+
_, err = expr.Run(vm, env)
2307+
require.NoError(t, err)
2308+
assert.Equal(t, tc.want, outMap["a"])
2309+
})
2310+
}
2311+
}

pkg/exprhelpers/helpers.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ func init() { //nolint:gochecknoinits
6161
}
6262

6363
var keyValuePattern = regexp.MustCompile(`(?P<key>[^=\s]+)=(?:"(?P<quoted_value>[^"\\]*(?:\\.[^"\\]*)*)"|(?P<value>[^=\s]+)|\s*)`)
64+
var keyStart = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.-]*)=`) // More restrictive key pattern for loose parsing
6465

6566
var (
6667
geoIPCityReader *geoip2.Reader
@@ -989,6 +990,112 @@ func ParseKV(params ...any) (any, error) {
989990
return nil, nil
990991
}
991992

993+
// ParseKVLax parses key-value pairs with lax matching, supporting unquoted multi-word values
994+
// by using a scanner approach instead of regex.
995+
func ParseKVLax(params ...any) (any, error) {
996+
blob := params[0].(string)
997+
target := params[1].(map[string]any)
998+
prefix := params[2].(string)
999+
1000+
if _, ok := target[prefix]; !ok {
1001+
target[prefix] = make(map[string]string)
1002+
} else if _, ok := target[prefix].(map[string]string); !ok {
1003+
log.Errorf("ParseKVLax: target is not a map[string]string")
1004+
return nil, errors.New("target is not a map[string]string")
1005+
}
1006+
1007+
km := target[prefix].(map[string]string)
1008+
1009+
// Find all key= occurrences and slice values between them.
1010+
idxs := keyStart.FindAllStringSubmatchIndex(blob, -1)
1011+
if len(idxs) == 0 {
1012+
log.Errorf("could not find any key/value pair in line")
1013+
return nil, errors.New("invalid input format")
1014+
}
1015+
1016+
// Filter out matches that are inside quoted values
1017+
validIdxs := make([][]int, 0, len(idxs))
1018+
for _, m := range idxs {
1019+
keyStartPos := m[0]
1020+
// Check if this key= is inside a quoted value by looking backwards
1021+
if !isInsideQuotedValue(blob, keyStartPos) {
1022+
validIdxs = append(validIdxs, m)
1023+
}
1024+
}
1025+
1026+
if len(validIdxs) == 0 {
1027+
log.Errorf("could not find any key/value pair in line")
1028+
return nil, errors.New("invalid input format")
1029+
}
1030+
1031+
for i, m := range validIdxs {
1032+
// m layout: [ fullStart, fullEnd, group1Start, group1End ]
1033+
key := blob[m[2]:m[3]]
1034+
valStart := m[1] // right after '='
1035+
1036+
var valEnd int
1037+
if i+1 < len(validIdxs) {
1038+
valEnd = validIdxs[i+1][0] // start of next key
1039+
} else {
1040+
valEnd = len(blob)
1041+
}
1042+
1043+
raw := strings.TrimSpace(blob[valStart:valEnd])
1044+
val := parseValueLax(raw)
1045+
km[key] = val
1046+
}
1047+
1048+
log.Tracef("unmarshaled KV (lax): %+v", target[prefix])
1049+
return nil, nil
1050+
}
1051+
1052+
// parseValueLax handles quoted and unquoted values for lax parsing.
1053+
// - If it begins with a quote, it removes the surrounding quotes
1054+
// if the closing one is present and unescapes \" and \\.
1055+
// - For unquoted values, returns the entire trimmed value as-is
1056+
func parseValueLax(s string) string {
1057+
if s == "" {
1058+
return ""
1059+
}
1060+
1061+
if s[0] == '"' {
1062+
if len(s) >= 2 && s[len(s)-1] == '"' {
1063+
body := s[1 : len(s)-1]
1064+
body = strings.ReplaceAll(body, `\\`, `\`)
1065+
body = strings.ReplaceAll(body, `\"`, `"`)
1066+
return body
1067+
}
1068+
return strings.TrimPrefix(s, `"`)
1069+
}
1070+
return s
1071+
}
1072+
1073+
// isInsideQuotedValue checks if a position in the string is inside a quoted value
1074+
// by counting unescaped quotes before the position
1075+
func isInsideQuotedValue(s string, pos int) bool {
1076+
inQuote := false
1077+
1078+
for i := 0; i <= pos && i < len(s); i++ {
1079+
if s[i] == '"' {
1080+
// Check if this quote is escaped
1081+
escaped := false
1082+
backslashCount := 0
1083+
for j := i - 1; j >= 0 && s[j] == '\\'; j-- {
1084+
backslashCount++
1085+
}
1086+
if backslashCount%2 == 1 {
1087+
escaped = true
1088+
}
1089+
1090+
if !escaped {
1091+
inQuote = !inQuote
1092+
}
1093+
}
1094+
}
1095+
1096+
return inQuote
1097+
}
1098+
9921099
func Hostname(params ...any) (any, error) {
9931100
hostname, err := os.Hostname()
9941101
if err != nil {

0 commit comments

Comments
 (0)