Skip to content

Commit 4303a62

Browse files
feat: improve ParseKV function to handle unquoted values with spaces
- Replace regex-based parsing with scanner approach for better handling of complex key-value pairs - Add support for unquoted values containing spaces (e.g., UNIFIhost=Express 7) - Maintain backward compatibility with existing quoted and simple unquoted values - Add robust filtering to prevent false positives from invalid key patterns - Improve quote handling and escaping for quoted values - Add comprehensive test cases covering edge cases and mixed scenarios Fixes parsing issues with CEF logs and other formats where values contain spaces without quotes.
1 parent e40f284 commit 4303a62

File tree

2 files changed

+130
-24
lines changed

2 files changed

+130
-24
lines changed

pkg/exprhelpers/exprlib_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,48 @@ func TestParseKv(t *testing.T) {
21842184
want: map[string]string{"bar": "toto", "foo": ""},
21852185
expr: `ParseKV(value, out, "a")`,
21862186
},
2187+
{
2188+
name: "ParseKV() test: unquoted value with spaces",
2189+
value: `UNIFIhost=Express 7 port=443`,
2190+
want: map[string]string{"UNIFIhost": "Express 7", "port": "443"},
2191+
expr: `ParseKV(value, out, "a")`,
2192+
},
2193+
{
2194+
name: "ParseKV() test: mixed quoted and unquoted with spaces",
2195+
value: `msg="Hello World" host=My Server name=test`,
2196+
want: map[string]string{"msg": "Hello World", "host": "My Server", "name": "test"},
2197+
expr: `ParseKV(value, out, "a")`,
2198+
},
2199+
{
2200+
name: "ParseKV() test: escaped quotes in quoted value",
2201+
value: `msg="He said \"Hello\"" status=ok`,
2202+
want: map[string]string{"msg": `He said "Hello"`, "status": "ok"},
2203+
expr: `ParseKV(value, out, "a")`,
2204+
},
2205+
{
2206+
name: "ParseKV() test: escaped backslashes in quoted value",
2207+
value: `path="C:\\Program Files\\App" status=running`,
2208+
want: map[string]string{"path": `C:\Program Files\App`, "status": "running"},
2209+
expr: `ParseKV(value, out, "a")`,
2210+
},
2211+
{
2212+
name: "ParseKV() test: empty unquoted value at end",
2213+
value: `host=server port=443 debug=`,
2214+
want: map[string]string{"host": "server", "port": "443", "debug": ""},
2215+
expr: `ParseKV(value, out, "a")`,
2216+
},
2217+
{
2218+
name: "ParseKV() test: complex CEF-like log",
2219+
value: `CEF:0|Ubiquiti|UniFi|7.4.162|login|User Login|3|src=192.168.1.100 duser=admin msg=User login successful UNIFIhost=Express 7 UNIFIport=443`,
2220+
want: map[string]string{"src": "192.168.1.100", "duser": "admin", "msg": "User login successful", "UNIFIhost": "Express 7", "UNIFIport": "443"},
2221+
expr: `ParseKV(value, out, "a")`,
2222+
},
2223+
{
2224+
name: "ParseKV() test: robust parsing with mixed valid and invalid patterns",
2225+
value: `valid_key=value1 some_text_without_equals another-valid.key=value2 _valid_key=value3`,
2226+
want: map[string]string{"valid_key": "value1 some_text_without_equals", "another-valid.key": "value2", "_valid_key": "value3"},
2227+
expr: `ParseKV(value, out, "a")`,
2228+
},
21872229
}
21882230

21892231
for _, tc := range tests {

pkg/exprhelpers/helpers.go

Lines changed: 88 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func init() { //nolint:gochecknoinits
6060
}
6161
}
6262

63-
var keyValuePattern = regexp.MustCompile(`(?P<key>[^=\s]+)=(?:"(?P<quoted_value>[^"\\]*(?:\\.[^"\\]*)*)"|(?P<value>[^=\s]+)|\s*)`)
63+
var keyStart = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.-]*)=`) // More restrictive key pattern
6464

6565
var (
6666
geoIPCityReader *geoip2.Reader
@@ -950,45 +950,109 @@ func ParseKV(params ...any) (any, error) {
950950
target := params[1].(map[string]any)
951951
prefix := params[2].(string)
952952

953-
matches := keyValuePattern.FindAllStringSubmatch(blob, -1)
954-
if matches == nil {
953+
if _, ok := target[prefix]; !ok {
954+
target[prefix] = make(map[string]string)
955+
} else if _, ok := target[prefix].(map[string]string); !ok {
956+
log.Errorf("ParseKV: target is not a map[string]string")
957+
return nil, errors.New("target is not a map[string]string")
958+
}
959+
960+
km := target[prefix].(map[string]string)
961+
962+
// Find all key= occurrences and slice values between them.
963+
idxs := keyStart.FindAllStringSubmatchIndex(blob, -1)
964+
if len(idxs) == 0 {
955965
log.Errorf("could not find any key/value pair in line")
956966
return nil, errors.New("invalid input format")
957967
}
958968

959-
if _, ok := target[prefix]; !ok {
960-
target[prefix] = make(map[string]string)
961-
} else {
962-
_, ok := target[prefix].(map[string]string)
963-
if !ok {
964-
log.Errorf("ParseKV: target is not a map[string]string")
965-
return nil, errors.New("target is not a map[string]string")
969+
// Filter out matches that are inside quoted values
970+
validIdxs := make([][]int, 0, len(idxs))
971+
for _, m := range idxs {
972+
keyStart := m[0]
973+
// Check if this key= is inside a quoted value by looking backwards
974+
if !isInsideQuotedValue(blob, keyStart) {
975+
validIdxs = append(validIdxs, m)
966976
}
967977
}
968978

969-
for _, match := range matches {
970-
key := ""
971-
value := ""
979+
if len(validIdxs) == 0 {
980+
log.Errorf("could not find any key/value pair in line")
981+
return nil, errors.New("invalid input format")
982+
}
983+
984+
for i, m := range validIdxs {
985+
// m layout: [ fullStart, fullEnd, group1Start, group1End ]
986+
key := blob[m[2]:m[3]]
987+
valStart := m[1] // right after '='
972988

973-
for i, name := range keyValuePattern.SubexpNames() {
974-
switch {
975-
case name == "key":
976-
key = match[i]
977-
case name == "quoted_value" && match[i] != "":
978-
value = match[i]
979-
case name == "value" && match[i] != "":
980-
value = match[i]
981-
}
989+
var valEnd int
990+
if i+1 < len(validIdxs) {
991+
valEnd = validIdxs[i+1][0] // start of next key
992+
} else {
993+
valEnd = len(blob)
982994
}
983995

984-
target[prefix].(map[string]string)[key] = value
996+
raw := strings.TrimSpace(blob[valStart:valEnd])
997+
val := parseValue(raw)
998+
km[key] = val
985999
}
9861000

9871001
log.Tracef("unmarshaled KV: %+v", target[prefix])
988-
9891002
return nil, nil
9901003
}
9911004

1005+
// parseValue handles quoted and unquoted values.
1006+
// - If it begins with a quote, it removes the surrounding quotes
1007+
// if the closing one is present and unescapes \" and \\.
1008+
// - Otherwise, returns the string as-is (already trimmed).
1009+
func parseValue(s string) string {
1010+
if s == "" {
1011+
return ""
1012+
}
1013+
if s[0] == '"' {
1014+
// Try to find a terminal unescaped quote.
1015+
// Because we bounded by next key=, s will usually end right at the quote
1016+
// (plus optional whitespace already trimmed). We still handle cases safely.
1017+
if len(s) >= 2 && s[len(s)-1] == '"' {
1018+
body := s[1 : len(s)-1]
1019+
// Unescape common sequences: \" and \\ (CEF-style logs rarely need more)
1020+
body = strings.ReplaceAll(body, `\\`, `\`)
1021+
body = strings.ReplaceAll(body, `\"`, `"`)
1022+
return body
1023+
}
1024+
// No closing quote found in slice; fall back to stripping leading quote only.
1025+
return strings.TrimPrefix(s, `"`)
1026+
}
1027+
return s
1028+
}
1029+
1030+
// isInsideQuotedValue checks if a position in the string is inside a quoted value
1031+
// by counting unescaped quotes before the position
1032+
func isInsideQuotedValue(s string, pos int) bool {
1033+
inQuote := false
1034+
1035+
for i := 0; i <= pos && i < len(s); i++ {
1036+
if s[i] == '"' {
1037+
// Check if this quote is escaped
1038+
escaped := false
1039+
backslashCount := 0
1040+
for j := i - 1; j >= 0 && s[j] == '\\'; j-- {
1041+
backslashCount++
1042+
}
1043+
if backslashCount%2 == 1 {
1044+
escaped = true
1045+
}
1046+
1047+
if !escaped {
1048+
inQuote = !inQuote
1049+
}
1050+
}
1051+
}
1052+
1053+
return inQuote
1054+
}
1055+
9921056
func Hostname(params ...any) (any, error) {
9931057
hostname, err := os.Hostname()
9941058
if err != nil {

0 commit comments

Comments
 (0)