Skip to content

Commit 95fc9c3

Browse files
committed
v1.3.2: add run+ env fields, generate SQL info
1 parent dc0cced commit 95fc9c3

File tree

1 file changed

+133
-20
lines changed

1 file changed

+133
-20
lines changed

src/o2-infologger-alert

Lines changed: 133 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# - extra rules
1414
# v1.3.1 01/10/2025 - adding detector field in alert message
1515
# - extra rules
16+
# v1.3.2 16/10/2025 - adding run + environment field in alert message
17+
# - generate SQL query for each alert
1618

1719
set cfg(TelegrafSocket) "/tmp/telegraf.sock"
1820
set cfg(TelegrafBucket) "InfologgerAlerts"
@@ -64,6 +66,7 @@ proc doLogIlg {errcode msg} {
6466
return
6567
}
6668

69+
logSetFieldsForId $errcode
6770
$logContext setField "ErrorCode" "$errcode"
6871
$logContext setField "Detector" "[getDetectorFromAlertId $errcode]"
6972
$logHandle log $logContext "$msg"
@@ -82,6 +85,7 @@ set defaultLevel 11
8285
set defaultSeverity "I"
8386
proc logResetFields {} {
8487
global logHandle
88+
if {$logHandle == ""} {return}
8589
global logContext
8690
global defaultLevel
8791
global defaultSeverity
@@ -91,6 +95,22 @@ proc logResetFields {} {
9195
$logContext setField "Level" "$defaultLevel"
9296
$logContext setField "Severity" "$defaultSeverity"
9397
$logContext setField "ErrorCode" ""
98+
$logContext setField "Run" ""
99+
$logContext setField "Partition" ""
100+
$logContext setField "Detector" ""
101+
}
102+
103+
proc logSetFieldsForId {id} {
104+
global logHandle
105+
if {$logHandle == ""} {return}
106+
global logContext
107+
if {$id == ""} {return}
108+
global alFieldsFirst
109+
set venv $alFieldsFirst($id)
110+
if {[llength $venv] == 3} {
111+
$logContext setField "Run" "[lindex $venv 0]"
112+
$logContext setField "Partition" "[lindex $venv 1]"
113+
}
94114
}
95115

96116
if {[catch {
@@ -504,15 +524,17 @@ if {0} {
504524
set tLast($id) $tval
505525
#puts "$id match"
506526

507-
global alTlast alState alTimeout alCount
527+
global alTlast alState alTimeout alCount alFieldsFirst
508528
if (!$alState($id)) {
509529
doLog "*** Alarm $id firing"
530+
set alFieldsFirst($id) [list "$v_run" "$v_partition" "$v_detector"]
510531
doLogIlg $id "Alarm $id triggered by message at $lastt_str_t on $v_hostname: $m"
511532
} else {
512533
doLog "*** Alarm $id still active"
513534
}
514535
set alTlast($id) [clock seconds]
515536
set alState($id) 1
537+
incr alCount($id)
516538
}
517539
}
518540
}
@@ -631,7 +653,23 @@ proc doOnline {} {
631653

632654
set alarms {}
633655

656+
proc clearAlarm {id} {
657+
# reset fields associated to an alarm
634658

659+
# time of last time the condition was detected
660+
# current state of alarm
661+
# timeout before resetting alarm
662+
# count number of messages matched
663+
# alFieldsFirst: keep track of ILG message fields of the message triggering the alert
664+
665+
global alTlast alState alTimeout alCount alSent alFieldsFirst
666+
set alTlast($id) -1
667+
set alState($id) 0
668+
set alTimeout($id) 30
669+
set alCount($id) 0
670+
set alSent($id) 0
671+
set alFieldsFirst($id) {}
672+
}
635673

636674
proc registerAlarm {id description doc test example} {
637675
global alarms
@@ -650,21 +688,8 @@ proc registerAlarm {id description doc test example} {
650688

651689

652690
lappend alarms $id $doc $description $test $onN $onT $offN $offT $example
653-
set tFirst($id) -1
654-
set tLast($id) -1
655-
set tN($id) 0
656-
657-
# time of last time the condition was detected
658-
# current state of alarm
659-
# timeout before resetting alarm
660-
# count number of messages matched
661-
662-
global alTlast alState alTimeout alCount alSent
663-
set alTlast($id) -1
664-
set alState($id) 0
665-
set alTimeout($id) 30
666-
set alCount($id) 0
667-
set alSent($id) 0
691+
692+
clearAlarm $id
668693
}
669694

670695

@@ -896,6 +921,85 @@ registerAlarm \
896921
####################################################################
897922

898923

924+
# function to convert the TCL logical definition to the corresponding SQL query
925+
proc generateSQLFromCondition {condition_expr} {
926+
set sql_conditions {}
927+
928+
# Split AND parts
929+
set parts [split $condition_expr "&&"]
930+
931+
foreach part $parts {
932+
set part [string trim $part]
933+
if {$part eq ""} { continue }
934+
935+
# remove external ()
936+
if {[string length $part] >= 2 &&
937+
[string index $part 0] eq "(" &&
938+
[string index $part end] eq ")"} {
939+
set part [string trim [string range $part 1 end-1]]
940+
}
941+
942+
# handle OR in Message
943+
if {[string first "||" $part] != -1} {
944+
set or_parts [split $part "||"]
945+
set or_clauses {}
946+
947+
foreach or_part $or_parts {
948+
set or_part [string trim $or_part]
949+
if {$or_part eq ""} { continue }
950+
951+
# remove ()
952+
if {[string length $or_part] >= 2 &&
953+
[string index $or_part 0] eq "(" &&
954+
[string index $or_part end] eq ")"} {
955+
set or_part [string trim [string range $or_part 1 end-1]]
956+
}
957+
958+
# extract match string pattern
959+
if {[regexp {\[string match "(.*)" "\$field\(Message\)"} $or_part -> pattern]} {
960+
set sql_pattern [string map {* %} $pattern]
961+
lappend or_clauses "Message LIKE '$sql_pattern'"
962+
} else {
963+
puts "Warning: OR condition not supported: $or_part"
964+
}
965+
}
966+
967+
# recombine OR
968+
set or_clause [join $or_clauses " OR "]
969+
lappend sql_conditions "($or_clause)"
970+
971+
} else {
972+
# No OR
973+
974+
# string match simple
975+
if {[regexp {\[string match "(.*)" "\$field\(([^)]+)\)"} $part -> pattern field]} {
976+
if {$field eq "Message"} {
977+
set sql_pattern [string map {* %} $pattern]
978+
lappend sql_conditions "Message LIKE '$sql_pattern'"
979+
} else {
980+
puts "Warning: string match sur champ non Message: $part"
981+
}
982+
983+
# compare chars
984+
} elseif {[regexp {"\$field\(([^)]+)\)"\s*==\s*"([^"]+)"} $part -> field value]} {
985+
lappend sql_conditions "$field = '$value'"
986+
987+
# compare numbers
988+
} elseif {[regexp {"\$field\(([^)]+)\)"\s*==\s*([0-9]+)} $part -> field value]} {
989+
lappend sql_conditions "$field = $value"
990+
991+
} else {
992+
puts "Warning: condition not supported: $part"
993+
}
994+
}
995+
}
996+
997+
set where_clause [join $sql_conditions " AND "]
998+
return "WHERE $where_clause"
999+
}
1000+
1001+
1002+
8991003
set metricFd [open "|socat - UNIX-SENDTO:$cfg(TelegrafSocket)" "w"]
9001004

9011005
proc sendMetric {id value} {
@@ -915,6 +1019,7 @@ proc sendMetric {id value} {
9151019
}
9161020

9171021

1022+
9181023
set listAlarms {}
9191024
foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9201025
lappend listAlarms $id
@@ -925,6 +1030,16 @@ if {$cfg(DumpRules)} {
9251030
foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9261031
puts "| $id | $description | $doc | [getDetectorFromAlertId $id] | system | facility | errcode | rule1<BR>rule2 | $exampleTime | PROD |"
9271032
}
1033+
1034+
# print SQL as well
1035+
puts "\n\n"
1036+
foreach {id doc description test onN onT offN offT exampleTime} $alarms {
1037+
set sql ""
1038+
set sql [generateSQLFromCondition $test]
1039+
puts "$id : $sql"
1040+
}
1041+
1042+
9281043
exit 0
9291044
}
9301045

@@ -944,18 +1059,16 @@ while {1} {
9441059
foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9451060
if ($alState($id)) {
9461061
if {$now >= [expr $alTlast($id) + $offT]} {
947-
doLogIlg ${id} "Alarm ${id} cleared after timeout."
1062+
doLogIlg ${id} "Alarm ${id} cleared after timeout. ($alCount($id) match)"
9481063
sendMetric $id 0
949-
set alState($id) 0
950-
set alSent($id) 0
1064+
clearAlarm $id
9511065
} else {
9521066
if {($alSent($id) == 0) || $repeatMetrics} {
9531067
if {$logHandle != ""} {
9541068
doLogIlg ${id} "Alarm ${id} firing."
9551069
$logContext setField "Level" "1"
9561070
$logContext setField "Severity" "E"
9571071
doLogIlg ${id} "$doc - $description"
958-
logResetFields
9591072
}
9601073
sendMetric $id 1
9611074
set alSent($id) 1

0 commit comments

Comments
 (0)