1313# - extra rules
1414# v1.3.1 01/10/2025 - adding detector field in alert message
1515# - extra rules
16+ # v1.3.2 16/10/2025 - adding run + environment field in alert message
17+ # - generate SQL query for each alert
1618
1719set cfg(TelegrafSocket) " /tmp/telegraf.sock"
1820set cfg(TelegrafBucket) " InfologgerAlerts"
@@ -64,6 +66,7 @@ proc doLogIlg {errcode msg} {
6466 return
6567 }
6668
69+ logSetFieldsForId $errcode
6770 $logContext setField " ErrorCode" " $errcode "
6871 $logContext setField " Detector" " [ getDetectorFromAlertId $errcode ] "
6972 $logHandle log $logContext " $msg "
@@ -82,6 +85,7 @@ set defaultLevel 11
8285set defaultSeverity " I"
8386proc logResetFields {} {
8487 global logHandle
88+ if {$logHandle == " " } {return }
8589 global logContext
8690 global defaultLevel
8791 global defaultSeverity
@@ -91,6 +95,22 @@ proc logResetFields {} {
9195 $logContext setField " Level" " $defaultLevel "
9296 $logContext setField " Severity" " $defaultSeverity "
9397 $logContext setField " ErrorCode" " "
98+ $logContext setField " Run" " "
99+ $logContext setField " Partition" " "
100+ $logContext setField " Detector" " "
101+ }
102+
103+ proc logSetFieldsForId {id} {
104+ global logHandle
105+ if {$logHandle == " " } {return }
106+ global logContext
107+ if {$id == " " } {return }
108+ global alFieldsFirst
109+ set venv $alFieldsFirst($id)
110+ if {[llength $venv ] == 3} {
111+ $logContext setField " Run" " [ lindex $venv 0] "
112+ $logContext setField " Partition" " [ lindex $venv 1] "
113+ }
94114}
95115
96116if {[catch {
@@ -504,15 +524,17 @@ if {0} {
504524 set tLast($id ) $tval
505525 # puts "$id match"
506526
507- global alTlast alState alTimeout alCount
527+ global alTlast alState alTimeout alCount alFieldsFirst
508528 if (!$alState($id) ) {
509529 doLog " *** Alarm $id firing"
530+ set alFieldsFirst($id ) [list " $v_run " " $v_partition " " $v_detector " ]
510531 doLogIlg $id " Alarm $id triggered by message at $lastt_str_t on $v_hostname : $m "
511532 } else {
512533 doLog " *** Alarm $id still active"
513534 }
514535 set alTlast($id ) [clock seconds]
515536 set alState($id ) 1
537+ incr alCount($id )
516538 }
517539 }
518540 }
@@ -631,7 +653,23 @@ proc doOnline {} {
631653
632654set alarms {}
633655
656+ proc clearAlarm {id} {
657+ # reset fields associated to an alarm
634658
659+ # time of last time the condition was detected
660+ # current state of alarm
661+ # timeout before resetting alarm
662+ # count number of messages matched
663+ # alFieldsFirst: keep track of ILG message fields of the message triggering the alert
664+
665+ global alTlast alState alTimeout alCount alSent alFieldsFirst
666+ set alTlast($id ) -1
667+ set alState($id ) 0
668+ set alTimeout($id ) 30
669+ set alCount($id ) 0
670+ set alSent($id ) 0
671+ set alFieldsFirst($id ) {}
672+ }
635673
636674proc registerAlarm {id description doc test example} {
637675 global alarms
@@ -650,21 +688,8 @@ proc registerAlarm {id description doc test example} {
650688
651689
652690 lappend alarms $id $doc $description $test $onN $onT $offN $offT $example
653- set tFirst($id ) -1
654- set tLast($id ) -1
655- set tN($id ) 0
656-
657- # time of last time the condition was detected
658- # current state of alarm
659- # timeout before resetting alarm
660- # count number of messages matched
661-
662- global alTlast alState alTimeout alCount alSent
663- set alTlast($id ) -1
664- set alState($id ) 0
665- set alTimeout($id ) 30
666- set alCount($id ) 0
667- set alSent($id ) 0
691+
692+ clearAlarm $id
668693}
669694
670695
@@ -896,6 +921,85 @@ registerAlarm \
896921# ###################################################################
897922
898923
924+ # function to convert the TCL logical definition to the corresponding SQL query
925+ proc generateSQLFromCondition {condition_expr} {
926+ set sql_conditions {}
927+
928+ # Split AND parts
929+ set parts [split $condition_expr " &&" ]
930+
931+ foreach part $parts {
932+ set part [string trim $part ]
933+ if {$part eq " " } { continue }
934+
935+ # remove external ()
936+ if {[string length $part ] >= 2 &&
937+ [string index $part 0] eq " (" &&
938+ [string index $part end] eq " )" } {
939+ set part [string trim [string range $part 1 end-1]]
940+ }
941+
942+ # handle OR in Message
943+ if {[string first " ||" $part ] != -1} {
944+ set or_parts [split $part " ||" ]
945+ set or_clauses {}
946+
947+ foreach or_part $or_parts {
948+ set or_part [string trim $or_part ]
949+ if {$or_part eq " " } { continue }
950+
951+ # remove ()
952+ if {[string length $or_part ] >= 2 &&
953+ [string index $or_part 0] eq " (" &&
954+ [string index $or_part end] eq " )" } {
955+ set or_part [string trim [string range $or_part 1 end-1]]
956+ }
957+
958+ # extract match string pattern
959+ if {[regexp {\[string match "(.*)" "\$field\(Message\)"} $or_part -> pattern]} {
960+ set sql_pattern [string map {* %} $pattern ]
961+ lappend or_clauses " Message LIKE '$sql_pattern '"
962+ } else {
963+ puts " Warning: OR condition not supported: $or_part "
964+ }
965+ }
966+
967+ # recombine OR
968+ set or_clause [join $or_clauses " OR " ]
969+ lappend sql_conditions " ($or_clause )"
970+
971+ } else {
972+ # No OR
973+
974+ # string match simple
975+ if {[regexp {\[string match "(.*)" "\$field\(([^)]+)\)"} $part -> pattern field]} {
976+ if {$field eq " Message" } {
977+ set sql_pattern [string map {* %} $pattern ]
978+ lappend sql_conditions " Message LIKE '$sql_pattern '"
979+ } else {
980+ puts " Warning: string match sur champ non Message: $part "
981+ }
982+
983+ # compare chars
984+ } elseif {[regexp {"\$field\(([^)]+)\)"\s*==\s*"([^"]+)"} $part -> field value]} {
985+ lappend sql_conditions " $field = '$value '"
986+
987+ # compare numbers
988+ } elseif {[regexp {"\$field\(([^)]+)\)"\s*==\s*([0-9]+)} $part -> field value]} {
989+ lappend sql_conditions " $field = $value "
990+
991+ } else {
992+ puts " Warning: condition not supported: $part "
993+ }
994+ }
995+ }
996+
997+ set where_clause [join $sql_conditions " AND " ]
998+ return " WHERE $where_clause "
999+ }
1000+
1001+
1002+
8991003set metricFd [open " |socat - UNIX-SENDTO:$cfg(TelegrafSocket) " " w" ]
9001004
9011005proc sendMetric {id value} {
@@ -915,6 +1019,7 @@ proc sendMetric {id value} {
9151019}
9161020
9171021
1022+
9181023set listAlarms {}
9191024foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9201025 lappend listAlarms $id
@@ -925,6 +1030,16 @@ if {$cfg(DumpRules)} {
9251030 foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9261031 puts " | $id | $description | $doc | [ getDetectorFromAlertId $id ] | system | facility | errcode | rule1<BR>rule2 | $exampleTime | PROD |"
9271032 }
1033+
1034+ # print SQL as well
1035+ puts " \n\n "
1036+ foreach {id doc description test onN onT offN offT exampleTime} $alarms {
1037+ set sql " "
1038+ set sql [generateSQLFromCondition $test ]
1039+ puts " $id : $sql "
1040+ }
1041+
1042+
9281043 exit 0
9291044}
9301045
@@ -944,18 +1059,16 @@ while {1} {
9441059 foreach {id doc description test onN onT offN offT exampleTime} $alarms {
9451060 if ($alState($id) ) {
9461061 if {$now >= [expr $alTlast($id) + $offT ]} {
947- doLogIlg ${id} " Alarm ${id} cleared after timeout."
1062+ doLogIlg ${id} " Alarm ${id} cleared after timeout. ( $alCount($id) match) "
9481063 sendMetric $id 0
949- set alState($id ) 0
950- set alSent($id ) 0
1064+ clearAlarm $id
9511065 } else {
9521066 if {($alSent($id) == 0) || $repeatMetrics } {
9531067 if {$logHandle != " " } {
9541068 doLogIlg ${id} " Alarm ${id} firing."
9551069 $logContext setField " Level" " 1"
9561070 $logContext setField " Severity" " E"
9571071 doLogIlg ${id} " $doc - $description "
958- logResetFields
9591072 }
9601073 sendMetric $id 1
9611074 set alSent($id ) 1
0 commit comments