Skip to content

Commit ed82a8a

Browse files
committed
Adding 1 new alert
1 parent 2a745fa commit ed82a8a

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,9 @@ Feel free to open an issue on github or use the contact author on the SplunkBase
360360

361361
## Release Notes
362362
### 4.0.5
363+
New alerts:
364+
- `AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2`
365+
363366
New reports:
364367
- `SearchHeadLevel - indexes per dashboard`
365368

default/data/ui/nav/default.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
</collection>
2222
<collection label="Performance">
2323
<a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkEnterpriseLevel%20-%20Splunk%20Servers%20with%20resource%20starvation">Splunk Servers with resource starvation</a>
24+
<a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkEnterpriseLevel%20-%20Splunk%20servers%20with%20resource%20starvation%20v2">Splunk servers with resource starvation v2</a>
25+
2426
<a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkLevel%20-%20Time%20skew%20on%20Splunk%20Servers">Time skew on Splunk Servers</a>
2527
</collection>
2628
</collection>

default/savedsearches.conf

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3297,7 +3297,7 @@ alert.suppress = 0
32973297
alert.track = 1
32983298
counttype = number of events
32993299
cron_schedule = 13 */2 * * *
3300-
description = Chance the alert requires action? Moderate. Detect when a Splunk enterprise host is reporting that it is seeing excessive response times while running operations
3300+
description = Chance the alert requires action? Moderate. Detect when a Splunk enterprise host is reporting that it is seeing excessive response times while running operations. A simpler alternative to this alert is "AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2"
33013301
dispatch.earliest_time = -120m@m
33023302
dispatch.latest_time = now
33033303
display.events.fields = ["host","source","sourcetype"]
@@ -3311,7 +3311,7 @@ relation = greater than
33113311
request.ui_dispatch_app = SplunkAdmins
33123312
request.ui_dispatch_view = search
33133313
search = ```Attempt to find entries in the splunkd logs that indiciate that Splunk is resource constrained and requires more CPU or similar```\
3314-
index=_internal `indexerhosts` sourcetype=splunkd `splunkadmins_splunkd_source` "Might indicate hardware or splunk limitations" OR "took longer than" ```This is useful for reporting but not so useful for alerting... OR "WARN PeriodicReapingTimeout"``` NOT "Might indicate slow ldap server." ```Add in OR (WARN ConfMetrics) ?)``` \
3314+
index=_internal `splunkenterprisehosts` sourcetype=splunkd `splunkadmins_splunkd_source` "Might indicate hardware or splunk limitations" OR "took longer than" ```This is useful for reporting but not so useful for alerting... OR "WARN PeriodicReapingTimeout"``` NOT "Might indicate slow ldap server." ```Add in OR (WARN ConfMetrics) ?)``` \
33153315
| rex "^[\d-]+ [\d:\.]+( )+[\+-]?\d+( )+[^ ]+( )+(?P<componentAndArea>([^ ]+( )+){3}).*\((?P<number>\d+) milliseconds" \
33163316
| rex "^[\d-]+ [\d:\.]+( )+[\+-]?\d+( )+[^ ]+( )+(?P<componentAndArea2>DispatchManager\s+([^ ]+( )+){3}).*elapsed_ms=(?P<number3>\d+)" \
33173317
| rex "Spent (?P<number2>\d+)"\
@@ -8889,3 +8889,31 @@ search = | rest `splunkadmins_restmacro` /servicesNS/-/-/data/ui/views f=eai:dat
88898889
| eval indexes=mvmap(indexes, trim(replace(indexes, "'", ""))) \
88908890
| eval indexes=mvdedup(indexes) \
88918891
| stats values(indexes) AS indexes by title
8892+
8893+
[AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2]
8894+
alert.suppress = 0
8895+
alert.track = 1
8896+
alert.digest_mode = 1
8897+
alert.severity = 2
8898+
counttype = number of events
8899+
cron_schedule = 51 * * * *
8900+
description = Chance the alert requires action? Moderate. The goal is to find splunk enterprise instances that are showing signs of a performance issue so action can be taken. A more thorough version of this alert exists as "AllSplunkEnterpriseLevel - Splunk Servers with resource starvation"
8901+
dispatch.earliest_time = -1h@h
8902+
dispatch.latest_time = now
8903+
display.general.type = statistics
8904+
display.page.search.tab = statistics
8905+
display.visualizations.charting.chart = area
8906+
enableSched = 1
8907+
quantity = 0
8908+
relation = greater than
8909+
request.ui_dispatch_app = SplunkAdmins
8910+
request.ui_dispatch_view = search
8911+
search = index=_internal sourcetype=splunkd `splunkadmins_splunkd_source` `splunkenterprisehosts` "Either time adjusted forwards by" OR "event loop was descheduled" ``` this is an early indicator but a little bit noisy OR "CallbackRunnerThread is unusually busy"``` OR "took longer than seems reasonable"\
8912+
| rex "\((?P<milliseconds>\d+) milliseconds"\
8913+
| where isnull(milliseconds) OR milliseconds>50000\
8914+
| eventstats count by host\
8915+
| where count>2\
8916+
| table _time, _raw, host, milliseconds\
8917+
| eval _time=strftime(_time, "%+")
8918+
disabled = 1
8919+

0 commit comments

Comments
 (0)