diff --git a/pkg/systemlogmonitor/log_monitor.go b/pkg/systemlogmonitor/log_monitor.go index cb0cdd0b4..c5f4e4417 100644 --- a/pkg/systemlogmonitor/log_monitor.go +++ b/pkg/systemlogmonitor/log_monitor.go @@ -20,6 +20,8 @@ import ( "encoding/json" "fmt" "os" + "regexp" + "strings" "time" "k8s.io/klog/v2" @@ -157,6 +159,9 @@ func (l *logMonitor) parseLog(log *systemlogtypes.Log) { continue } status := l.generateStatus(matched, rule) + if status == nil { + continue + } klog.Infof("New status generated: %+v", status) l.output <- status } @@ -169,12 +174,31 @@ func (l *logMonitor) generateStatus(logs []*systemlogtypes.Log, rule systemlogty message := generateMessage(logs, rule.PatternGeneratedMessageSuffix) var events []types.Event var changedConditions []*types.Condition + + // Support configuring rule.Reason as a Sprintf format string and formatting it with the matched capturing groups in rule.Pattern. + re := regexp.MustCompile(rule.Pattern) + matches := re.FindStringSubmatch(message) + reason := rule.Reason + formatArgs := make([]interface{}, 0) + if len(matches) > 1 { + // Use the matched capturing groups as the arguments for Sprintf. + for _, value := range matches[1:] { + formatArgs = append(formatArgs, value) + } + } + reason = fmt.Sprintf(rule.Reason, formatArgs...) + // If fmt.Sprintf fails, it will add "%!" for each failed template in the result string. + if strings.Contains(reason, "%!") { + klog.Errorf("Got wrong string %q for reason %q with pattern %q", reason, rule.Reason, rule.Pattern) + return nil + } + if rule.Type == types.Temp { // For temporary error only generate event events = append(events, types.Event{ Severity: types.Warn, Timestamp: timestamp, - Reason: rule.Reason, + Reason: reason, Message: message, }) } else { @@ -185,19 +209,19 @@ func (l *logMonitor) generateStatus(logs []*systemlogtypes.Log, rule systemlogty // Update transition timestamp and message when the condition // changes. Condition is considered to be changed only when // status or reason changes. - if condition.Status == types.False || condition.Reason != rule.Reason { + if condition.Status == types.False || condition.Reason != reason { condition.Transition = timestamp condition.Message = message events = append(events, util.GenerateConditionChangeEvent( condition.Type, types.True, - rule.Reason, + reason, message, timestamp, )) } condition.Status = types.True - condition.Reason = rule.Reason + condition.Reason = reason changedConditions = append(changedConditions, condition) break } diff --git a/pkg/systemlogmonitor/log_monitor_test.go b/pkg/systemlogmonitor/log_monitor_test.go index 230e9c659..2a4762a7d 100644 --- a/pkg/systemlogmonitor/log_monitor_test.go +++ b/pkg/systemlogmonitor/log_monitor_test.go @@ -408,6 +408,150 @@ func TestGenerateStatusForMetrics(t *testing.T) { } } +func TestGenerateStatusForEvents(t *testing.T) { + for c, test := range []struct { + name string + rule logtypes.Rule + expected *types.Status + logs []*logtypes.Log + }{ + { + name: "without matching group", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): \\d+,.*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: &types.Status{ + Source: testSource, + Events: []types.Event{{ + Severity: types.Warn, + Timestamp: time.Unix(1000, 1000), + Reason: "NvidiaGPUXid", + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }}, + }, + }, + { + name: "one matching group", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid%s", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+),.*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: &types.Status{ + Source: testSource, + Events: []types.Event{{ + Severity: types.Warn, + Timestamp: time.Unix(1000, 1000), + Reason: "NvidiaGPUXid45", + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }}, + }, + }, + { + name: "two matching groups", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid%s, Ch%s", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+), Ch (\\d+).*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: &types.Status{ + Source: testSource, + Events: []types.Event{{ + Severity: types.Warn, + Timestamp: time.Unix(1000, 1000), + Reason: "NvidiaGPUXid45, Ch00000010", + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }}, + }, + }, + { + name: "not enough matching groups 1", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid%s, Ch%s", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+),.*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: nil, + }, + { + name: "not enough matching groups 2", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid%s", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): \\d+,.*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: nil, + }, + { + name: "indexed matching groups", + rule: logtypes.Rule{ + Type: types.Temp, + Reason: "NvidiaGPUXid%[1]s, Ch%[2]s", + Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+), Ch (\\d+).*", + }, + logs: []*logtypes.Log{ + { + Timestamp: time.Unix(1000, 1000), + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }, + }, + expected: &types.Status{ + Source: testSource, + Events: []types.Event{{ + Severity: types.Warn, + Timestamp: time.Unix(1000, 1000), + Reason: "NvidiaGPUXid45, Ch00000010", + Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010", + }}, + }, + }, + } { + l := &logMonitor{ + config: MonitorConfig{ + Source: testSource, + }, + } + (&l.config).ApplyDefaultConfiguration() + got := l.generateStatus(test.logs, test.rule) + + if !reflect.DeepEqual(test.expected, got) { + t.Errorf("case %d %s: expected status %+v, got %+v", c+1, test.name, test.expected, got) + } + } +} + func TestInitializeProblemMetricsOrDie(t *testing.T) { testCases := []struct { name string