|
| 1 | +# Alert Plugin Guide |
| 2 | + [](Alert-Plugin-Guide_kr.md) |
| 3 | + |
| 4 | +We can build our own alarm rules by handling alert scripting plugins which are able to compose various performance metrics. |
| 5 | + |
| 6 | +## How to |
| 7 | + 1. create 2 files in the server plugin directory (**[server_running_dir]/plugin** by default) |
| 8 | + * [PERFORMANCE_COUNTER_NAME].alert : script for alarm |
| 9 | + * [PERFORMANCE_COUNTER_NAME].conf : configuration for this alarm |
| 10 | + |
| 11 | + 2. [PERFORMANCE_COUNTER_NAME].conf |
| 12 | + * ```properties |
| 13 | + history_size=150 |
| 14 | + silent_time=300 |
| 15 | + check_time=30 |
| 16 | + ``` |
| 17 | + * ```history_size``` : data count that will be kept |
| 18 | + * if it is set as 150ea, the oldest data may be 300 seconds before data because Scouter counter data is sent every 2 seconds. |
| 19 | + * ```silent_time``` : alarm sleep time - If the alarm is occurred in past [x] seconds, the same alarm is ignored. |
| 20 | + * ```check_time``` : script([PERFORMANCE_COUNTER_NAME].alert) invoking interval. |
| 21 | + |
| 22 | + 3. [PERFORMANCE_COUNTER_NAME].alert |
| 23 | + * script file for alert rule (java) |
| 24 | + |
| 25 | + * sample1 (**GcTime.alert**) |
| 26 | + * alert when ```GcTime``` is over than 2 sec |
| 27 | + ```java |
| 28 | + // void process(RealCounter $counter) |
| 29 | + int gcTime = $counter.intValue(); |
| 30 | + if(gcTime > 2000) { |
| 31 | + $counter.fatal("gc time fatal", "gc time:" + respTime + "ms"); |
| 32 | + } |
| 33 | + ``` |
| 34 | + |
| 35 | + * sample2 (**Elasped90%.alert**) |
| 36 | + * alert when ```Elasped90%``` is over than 1.5 sec (ignore when TPS is lower than 3 sec.) |
| 37 | + ```java |
| 38 | + // void process(RealCounter $counter) |
| 39 | + int warn = 1500; |
| 40 | + int fatal = 2000; |
| 41 | + |
| 42 | + int tps = $counter.intValue("TPS"); |
| 43 | + int respTime = $counter.intValue(); |
| 44 | + |
| 45 | + String objType = $counter.objType(); |
| 46 | + String objName = $counter.objName(); |
| 47 | + |
| 48 | + java.text.NumberFormat f = java.text.NumberFormat.getInstance(); |
| 49 | + f.setMaximumFractionDigits(2); |
| 50 | + |
| 51 | + if(tps < 3) return; |
| 52 | + if(respTime > fatal) { |
| 53 | + $counter.fatal("resp time fatal high", "90% resp time:" + f.format((long)respTime) + "ms, tps:" + tps); |
| 54 | + } else if(respTime > warn) { |
| 55 | + $counter.warn("resp time warn high", "90% resp time:" + f.format((long)respTime) + "ms, tps:" + tps); |
| 56 | + } |
| 57 | + ``` |
| 58 | + |
| 59 | + * sample3 (**TPS.alert**) |
| 60 | + * alert when ```TPS``` increase or decrease sharply. |
| 61 | + ```java |
| 62 | + // void process(RealCounter $counter) |
| 63 | + |
| 64 | + //increase |
| 65 | + float initTps = 20.0; |
| 66 | + float warnIncreaseRate = 1.2; |
| 67 | + float fatalIncreaseRate = 1.5; |
| 68 | + int compareBeforeSec = 180; |
| 69 | + |
| 70 | + //decrease |
| 71 | + float fatal1DecreaseRate = 0.7; //30% |
| 72 | + float fatal2DecreaseRate = 0.5; //50% |
| 73 | + int compareBeforeDecreaseSec = 120; //last 2 minute |
| 74 | + |
| 75 | + float tps = $counter.floatValue(); |
| 76 | + String objType = $counter.objType(); |
| 77 | + String objName = $counter.objName(); |
| 78 | + float errorRate = $counter.floatValue(); |
| 79 | + java.text.NumberFormat f = java.text.NumberFormat.getInstance(); |
| 80 | + f.setMaximumFractionDigits(1); |
| 81 | + |
| 82 | + if(tps > initTps) { |
| 83 | + float preValue = $counter.getAvg(compareBeforeSec, 4); |
| 84 | + if(preValue > 0.0f) { |
| 85 | + if(tps > preValue * fatalIncreaseRate) { |
| 86 | + $counter.fatal("TPS increase fatal" |
| 87 | + , "TPS is " + f.format((double)tps/preValue*100) + "% higher than " + compareBeforeSec + "sec ago" |
| 88 | + + ", TPS: " + tps); |
| 89 | + |
| 90 | + } else if(tps > preValue * warnIncreaseRate) { |
| 91 | + $counter.warn("TPS increase warning" |
| 92 | + , "TPS is " + f.format((double)tps/preValue*100) + "% higher than " + compareBeforeSec + "sec ago" |
| 93 | + + ", TPS: " + tps); |
| 94 | + } |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + float preValue = $counter.getAvg(compareBeforeDecreaseSec, 4); |
| 99 | + if(preValue > 5.0f) { |
| 100 | + if(tps < preValue * fatal2DecreaseRate) { |
| 101 | + $counter.fatal("TPS decrease fatal" |
| 102 | + , "TPS is " + f.format(((double)1-tps/preValue)*100) + "% lower than " + compareBeforeDecreaseSec + "sec ago" |
| 103 | + + ", TPS: " + tps); |
| 104 | + } else if(tps < preValue * fatal1DecreaseRate) { |
| 105 | + $counter.error("TPS decrease warn" |
| 106 | + , "TPS is " + f.format((double)(1-tps/preValue)*100) + "% lower than " + compareBeforeDecreaseSec + "sec ago" |
| 107 | + + ", TPS: " + tps); |
| 108 | + } |
| 109 | + } |
| 110 | + ``` |
| 111 | + |
| 112 | +### RealCounter API |
| 113 | +| method | desc | |
| 114 | +| ------------ | ---------- | |
| 115 | +| objName() | get object's name that produced the counter value | |
| 116 | +| objType() | get object's type that produced the counter value | |
| 117 | +| intValue() | get counter value as integer | |
| 118 | +| floatValue() | get counter value as float | |
| 119 | +| historySize() | get history size set by the conf file | |
| 120 | +| overCount(int value, int sec) | get how many times exceed the value in the seconds | |
| 121 | +| overCount(float value, int sec) | get how many times exceed the value in the seconds | |
| 122 | +| getAvg(int fromAgoSec, int durationSec) | calculate average of the counter values in fromAgoSec for durationSec | |
| 123 | +| getLatestAvg(int durationSec) | calculate average of the counter values from durationSec ago to now | |
| 124 | +| info(String title, String message) | invoke alarm as info level | |
| 125 | +| warn(String title, String message) | invoke alarm as warn level | |
| 126 | +| error(String title, String message) | invoke alarm as error level | |
| 127 | +| fatal(String title, String message) | invoke alarm as fatal level | |
| 128 | +| floatValue(String anotherCounterName) | get another counter's current value by the name | |
| 129 | +| intValue(String anotherCounterName) | get another counter's current value by the name | |
| 130 | + |
| 131 | +### Counter names |
| 132 | + * [counters.xml](https://github.com/scouter-project/scouter/blob/fe74bdb73a34be2f390f8476991d59a5de6ea204/scouter.common/src/main/resources/scouter/lang/counters/counters.xml) |
| 133 | + |
0 commit comments