Adding 1 new alert

gjanders · gjanders · commit ed82a8a5f592 · 2025-08-29T05:03:05.000Z
diff --git a/README.md b/README.md
@@ -360,6 +360,9 @@ Feel free to open an issue on github or use the contact author on the SplunkBase
 
 ## Release Notes
 ### 4.0.5
+New alerts:
+- `AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2`
+
 New reports:
 - `SearchHeadLevel - indexes per dashboard`
 
diff --git a/default/data/ui/nav/default.xml b/default/data/ui/nav/default.xml
@@ -21,6 +21,8 @@
 			</collection>   
 			<collection label="Performance">		
 				<a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkEnterpriseLevel%20-%20Splunk%20Servers%20with%20resource%20starvation">Splunk Servers with resource starvation</a>
+                                <a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkEnterpriseLevel%20-%20Splunk%20servers%20with%20resource%20starvation%20v2">Splunk servers with resource starvation v2</a>
+
 				<a href="/app/SplunkAdmins/alert?s=%2FservicesNS%2Fnobody%2FSplunkAdmins%2Fsaved%2Fsearches%2FAllSplunkLevel%20-%20Time%20skew%20on%20Splunk%20Servers">Time skew on Splunk Servers</a>				
 			</collection>   
 		</collection>   
diff --git a/default/savedsearches.conf b/default/savedsearches.conf
@@ -3297,7 +3297,7 @@ alert.suppress = 0
 alert.track = 1
 counttype = number of events
 cron_schedule = 13 */2 * * *
-description = Chance the alert requires action? Moderate. Detect when a Splunk enterprise host is reporting that it is seeing excessive response times while running operations
+description = Chance the alert requires action? Moderate. Detect when a Splunk enterprise host is reporting that it is seeing excessive response times while running operations. A simpler alternative to this alert is "AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2"
 dispatch.earliest_time = -120m@m
 dispatch.latest_time = now
 display.events.fields = ["host","source","sourcetype"]
@@ -3311,7 +3311,7 @@ relation = greater than
 request.ui_dispatch_app = SplunkAdmins
 request.ui_dispatch_view = search
 search = ```Attempt to find entries in the splunkd logs that indiciate that Splunk is resource constrained and requires more CPU or similar```\
-index=_internal `indexerhosts`  sourcetype=splunkd `splunkadmins_splunkd_source` "Might indicate hardware or splunk limitations" OR "took longer than" ```This is useful for reporting but not so useful for alerting... OR "WARN  PeriodicReapingTimeout"``` NOT "Might indicate slow ldap server." ```Add in OR (WARN ConfMetrics) ?)``` \
+index=_internal `splunkenterprisehosts`  sourcetype=splunkd `splunkadmins_splunkd_source` "Might indicate hardware or splunk limitations" OR "took longer than" ```This is useful for reporting but not so useful for alerting... OR "WARN  PeriodicReapingTimeout"``` NOT "Might indicate slow ldap server." ```Add in OR (WARN ConfMetrics) ?)``` \
 | rex "^[\d-]+ [\d:\.]+( )+[\+-]?\d+( )+[^ ]+( )+(?P<componentAndArea>([^ ]+( )+){3}).*\((?P<number>\d+) milliseconds" \
 | rex "^[\d-]+ [\d:\.]+( )+[\+-]?\d+( )+[^ ]+( )+(?P<componentAndArea2>DispatchManager\s+([^ ]+( )+){3}).*elapsed_ms=(?P<number3>\d+)" \
 | rex "Spent (?P<number2>\d+)"\
@@ -8889,3 +8889,31 @@ search = | rest `splunkadmins_restmacro` /servicesNS/-/-/data/ui/views f=eai:dat
 | eval indexes=mvmap(indexes, trim(replace(indexes, "'", ""))) \
 | eval indexes=mvdedup(indexes) \
 | stats values(indexes) AS indexes by title
+
+[AllSplunkEnterpriseLevel - Splunk servers with resource starvation v2]
+alert.suppress = 0
+alert.track = 1
+alert.digest_mode = 1
+alert.severity = 2
+counttype = number of events
+cron_schedule = 51 * * * *
+description = Chance the alert requires action? Moderate. The goal is to find splunk enterprise instances that are showing signs of a performance issue so action can be taken. A more thorough version of this alert exists as "AllSplunkEnterpriseLevel - Splunk Servers with resource starvation"
+dispatch.earliest_time = -1h@h
+dispatch.latest_time = now
+display.general.type = statistics
+display.page.search.tab = statistics
+display.visualizations.charting.chart = area
+enableSched = 1
+quantity = 0
+relation = greater than
+request.ui_dispatch_app = SplunkAdmins
+request.ui_dispatch_view = search
+search = index=_internal sourcetype=splunkd `splunkadmins_splunkd_source` `splunkenterprisehosts` "Either time adjusted forwards by" OR "event loop was descheduled" ``` this is an early indicator but a little bit noisy OR "CallbackRunnerThread is unusually busy"``` OR "took longer than seems reasonable"\
+| rex "\((?P<milliseconds>\d+) milliseconds"\
+| where isnull(milliseconds) OR milliseconds>50000\
+| eventstats count by host\
+| where count>2\
+| table _time, _raw, host, milliseconds\
+| eval _time=strftime(_time, "%+") 
+disabled = 1
+