Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/exprhelpers/expr_lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,13 @@ var exprFuncs = []exprCustomFunc{
new(func(string, map[string]any, string) error),
},
},
{
name: "ParseKVLax",
function: ParseKVLax,
signature: []any{
new(func(string, map[string]any, string) error),
},
},
{
name: "Hostname",
function: Hostname,
Expand Down
108 changes: 108 additions & 0 deletions pkg/exprhelpers/exprlib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2201,3 +2201,111 @@ func TestParseKv(t *testing.T) {
})
}
}

func TestParseKvLax(t *testing.T) {
err := Init(nil)
require.NoError(t, err)

tests := []struct {
name string
value string
want map[string]string
expr string
wantBuildErr bool
wantRuntimeErr bool
}{
{
name: "ParseKVLax() test: valid string",
value: "foo=bar",
want: map[string]string{"foo": "bar"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: valid string multiple",
value: "foo=bar bar=foo",
want: map[string]string{"foo": "bar", "bar": "foo"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: quoted string",
value: `foo="bar=toto"`,
want: map[string]string{"foo": "bar=toto"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: empty unquoted string",
value: `foo= bar=toto`,
want: map[string]string{"bar": "toto", "foo": ""},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: empty quoted string",
value: `foo="" bar=toto`,
want: map[string]string{"bar": "toto", "foo": ""},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: unquoted value with spaces",
value: `UNIFIhost=Express 7 port=443`,
want: map[string]string{"UNIFIhost": "Express 7", "port": "443"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: mixed quoted and unquoted with spaces",
value: `msg="Hello World" host=My Server name=test`,
want: map[string]string{"msg": "Hello World", "host": "My Server", "name": "test"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: escaped quotes in quoted value",
value: `msg="He said \"Hello\"" status=ok`,
want: map[string]string{"msg": `He said "Hello"`, "status": "ok"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: escaped backslashes in quoted value",
value: `path="C:\\Program Files\\App" status=running`,
want: map[string]string{"path": `C:\Program Files\App`, "status": "running"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: empty unquoted value at end",
value: `host=server port=443 debug=`,
want: map[string]string{"host": "server", "port": "443", "debug": ""},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: complex CEF-like log extension",
value: `src=192.168.1.100 duser=admin msg=User login successful UNIFIhost=Express 7 UNIFIport=443`,
want: map[string]string{"src": "192.168.1.100", "duser": "admin", "msg": "User login successful", "UNIFIhost": "Express 7", "UNIFIport": "443"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: iptables-style values with flags",
value: `RES=0x00 SYN URGP=0 ID=25029 DF PROTO=TCP`,
want: map[string]string{"RES": "0x00 SYN", "URGP": "0", "ID": "25029 DF", "PROTO": "TCP"},
expr: `ParseKVLax(value, out, "a")`,
},
{
name: "ParseKVLax() test: keycloak-style JSON values",
value: `error=user_not_found, code_id=e44d80b4-058d-4b45-b2ee-fac3d174e10c, userId=null, type=LOGIN_ERROR`,
want: map[string]string{"error": "user_not_found,", "code_id": "e44d80b4-058d-4b45-b2ee-fac3d174e10c,", "userId": "null,", "type": "LOGIN_ERROR"},
expr: `ParseKVLax(value, out, "a")`,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
outMap := make(map[string]any)
env := map[string]any{
"value": tc.value,
"out": outMap,
}
vm, err := expr.Compile(tc.expr, GetExprOptions(env)...)
require.NoError(t, err)
_, err = expr.Run(vm, env)
require.NoError(t, err)
assert.Equal(t, tc.want, outMap["a"])
})
}
}
108 changes: 108 additions & 0 deletions pkg/exprhelpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func init() { //nolint:gochecknoinits
}

var keyValuePattern = regexp.MustCompile(`(?P<key>[^=\s]+)=(?:"(?P<quoted_value>[^"\\]*(?:\\.[^"\\]*)*)"|(?P<value>[^=\s]+)|\s*)`)
var keyStart = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.-]*)=`) // More restrictive key pattern for loose parsing

var (
geoIPCityReader *geoip2.Reader
Expand Down Expand Up @@ -989,6 +990,113 @@ func ParseKV(params ...any) (any, error) {
return nil, nil
}

// ParseKVLax parses key-value pairs with lax matching, supporting unquoted multi-word values
// by using a scanner approach instead of regex.
func ParseKVLax(params ...any) (any, error) {
blob := params[0].(string)
target := params[1].(map[string]any)
prefix := params[2].(string)

if _, ok := target[prefix]; !ok {
target[prefix] = make(map[string]string)
} else if _, ok := target[prefix].(map[string]string); !ok {
log.Errorf("ParseKVLax: target is not a map[string]string")
return nil, errors.New("target is not a map[string]string")
}

km := target[prefix].(map[string]string)

// Find all key= occurrences and slice values between them.
idxs := keyStart.FindAllStringSubmatchIndex(blob, -1)
if len(idxs) == 0 {
log.Errorf("could not find any key/value pair in line")
return nil, errors.New("invalid input format")
}

// Filter out matches that are inside quoted values
validIdxs := make([][]int, 0, len(idxs))
for _, m := range idxs {
keyStartPos := m[0]
// Check if this key= is inside a quoted value by looking backwards
if !isInsideQuotedValue(blob, keyStartPos) {
validIdxs = append(validIdxs, m)
}
}

if len(validIdxs) == 0 {
log.Errorf("could not find any key/value pair in line")
return nil, errors.New("invalid input format")
}

for i, m := range validIdxs {
// m layout: [ fullStart, fullEnd, group1Start, group1End ]
key := blob[m[2]:m[3]]
valStart := m[1] // right after '='

var valEnd int
if i+1 < len(validIdxs) {
valEnd = validIdxs[i+1][0] // start of next key
} else {
valEnd = len(blob)
}

raw := strings.TrimSpace(blob[valStart:valEnd])
val := parseValueLax(raw)
km[key] = val
}

log.Tracef("unmarshaled KV (lax): %+v", target[prefix])
return nil, nil
}

// parseValueLax handles quoted and unquoted values for lax parsing.
// - If it begins with a quote, it removes the surrounding quotes
// if the closing one is present and unescapes \" and \\.
// - For unquoted values, returns the entire trimmed value as-is
func parseValueLax(s string) string {
if s == "" {
return ""
}

if s[0] == '"' {
if len(s) >= 2 && s[len(s)-1] == '"' {
body := s[1 : len(s)-1]
body = strings.ReplaceAll(body, `\\`, `\`)
body = strings.ReplaceAll(body, `\"`, `"`)
return body
}
return strings.TrimPrefix(s, `"`)
}
return s
}

// isInsideQuotedValue checks if a position in the string is inside a quoted value
// by counting unescaped quotes before the position
func isInsideQuotedValue(s string, pos int) bool {
inQuote := false

for i := 0; i <= pos && i < len(s); i++ {
if s[i] != '"' {
continue
}
// Check if this quote is escaped
escaped := false
backslashCount := 0
for j := i - 1; j >= 0 && s[j] == '\\'; j-- {
backslashCount++
}
if backslashCount%2 == 1 {
escaped = true
}

if !escaped {
inQuote = !inQuote
}
}

return inQuote
}

func Hostname(params ...any) (any, error) {
hostname, err := os.Hostname()
if err != nil {
Expand Down