Skip to content

Commit 12ff2e5

Browse files
authored
Merge pull request #13 from NETWAYS/feature/add-exclude
Add option to exclude alarms and usage elements
2 parents ad21861 + fae7be6 commit 12ff2e5

File tree

3 files changed

+123
-14
lines changed

3 files changed

+123
-14
lines changed

README.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Please prefer installation via system packages like `python3-requests`.
1616

1717
Alternatively you can install with pip:
1818

19-
pip3 install requests
19+
pip3 install -r requirements.txt
2020

2121
Make sure to modify the shebang to your environment, one of the following should be fine.
2222

@@ -43,12 +43,23 @@ optional arguments:
4343
--password PASSWORD, -p PASSWORD
4444
Password for Basic Auth
4545
--mode MODE, -m MODE Check mode
46+
--exclude [EXCLUDE ...]
47+
Exclude alarms or usage from the check results. Can be used multiple times and supports regular expressions.
4648
--max-age MAX_AGE, -M MAX_AGE
4749
Max age in minutes for capacity usage updates. Defaults to 5
4850
--version, -V Print version
4951
--insecure Do not verify TLS certificate. Be careful with this option, please
5052
```
5153

54+
The `--exclude` parameter will match against alarms and capacity-usage. It uses the following string representation (whitespaces included) to match against:
55+
56+
* alarms: `severity` `node_display_name` `feature_display_name` `event_type_display_name`
57+
* capacity-usage: `severity` `display_name`
58+
59+
## Examples
60+
61+
Mode: cluster-status
62+
5263
```
5364
$ ./check_vmware_nsxt.py --api 'https://vmware-nsx.local' -u icinga -p password --mode cluster-status
5465
[OK] control_cluster_status=STABLE - mgmt_cluster_status=STABLE - control_cluster_status=STABLE - nodes_online=3
@@ -66,14 +77,25 @@ $ ./check_vmware_nsxt.py --api 'https://vmware-nsx.local' -u icinga -p password
6677
| nodes_online=3;;;0
6778
```
6879

80+
Mode: alarms
81+
6982
```
7083
$ ./check_vmware_nsxt.py --api 'https://vmware-nsx.local' -u icinga -p password --mode alarms
7184
[WARNING] 1 alarms - 1 medium
7285
7386
[MEDIUM] (2021-04-26 17:25:18) (node1) Intelligence Health/Storage Latency High - Intelligence node storage latency is high.
87+
| alarms=1;;;0 alarms.medium=1;;;0
88+
```
89+
90+
```
91+
$ ./check_vmware_nsxt.py --api 'https://vmware-nsx.local' -u icinga -p password --mode alarms --exclude "LOW"
92+
# Excluded alerts will still be counted, but are not factored into the exit code
93+
[OK] 1 alarms
7494
| alarms=1;;;0
7595
```
7696

97+
Mode: capacity-usage
98+
7799
```
78100
$ ./check_vmware_nsxt.py --api 'https://vmware-nsx.local' -u icinga -p password --mode capacity-usage
79101
[OK] 28 info - no usages - last update: 2021-04-29 19:06:12

check_vmware_nsxt.py

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import logging
3838
import datetime
3939
import ssl
40+
import re
4041
from urllib.parse import urljoin
4142
import urllib3
4243
import requests
@@ -127,26 +128,26 @@ def request(self, url, method='GET'):
127128
except Exception as json_exc:
128129
raise CriticalException('Could not decode API JSON: ' + str(json_exc)) # pylint: disable=raise-missing-from
129130

130-
def get_cluster_status(self):
131+
def get_cluster_status(self, excludes=None):
131132
"""
132133
GET and build ClusterStatus
133134
"""
134-
return ClusterStatus(self.request('cluster/status'))
135+
return ClusterStatus(self.request('cluster/status'), excludes)
135136

136-
def get_alarms(self):
137+
def get_alarms(self, excludes=None):
137138
"""
138139
GET and build Alarms
139140
"""
140141
status = "OPEN"
141142
# status = "RESOLVED" # for testing
142143
result = self.request('alarms?page_size=100&status=%s&sort_ascending=false' % status)
143-
return Alarms(result['results'])
144+
return Alarms(data=result['results'], excludes=excludes)
144145

145-
def get_capacity_usage(self):
146+
def get_capacity_usage(self, excludes=None):
146147
"""
147148
GET and build CapacityUsage
148149
"""
149-
return CapacityUsage(self.request('capacity/usage'), self.max_age)
150+
return CapacityUsage(self.request('capacity/usage'), self.max_age, excludes)
150151

151152

152153
class CheckResult:
@@ -203,9 +204,12 @@ class ClusterStatus(CheckResult):
203204
https://vdc-download.vmware.com/vmwb-repository/dcr-public/787988e9-6348-4b2a-8617-e6d672c690ee/a187360c-77d5-4c0c-92a8-8e07aa161a27/api_includes/method_ReadClusterStatus.html
204205
"""
205206

206-
def __init__(self, data):
207+
def __init__(self, data, excludes):
207208
super().__init__()
208209
self.data = data
210+
self.excludes = excludes
211+
if excludes is None:
212+
self.excludes = []
209213

210214
def build_output(self):
211215
for area in ['control_cluster_status', 'mgmt_cluster_status', 'control_cluster_status']:
@@ -234,14 +238,33 @@ class Alarms(CheckResult):
234238
https://vdc-download.vmware.com/vmwb-repository/dcr-public/787988e9-6348-4b2a-8617-e6d672c690ee/a187360c-77d5-4c0c-92a8-8e07aa161a27/api_includes/method_GetAlarms.html
235239
"""
236240

237-
def __init__(self, data):
241+
def __init__(self, data, excludes):
238242
super().__init__()
239243
self.data = data
244+
self.excludes = excludes
245+
if excludes is None:
246+
self.excludes = []
247+
248+
def _is_excluded(self, alarm):
249+
# to exclude via --exclude
250+
identifier = "%s %s %s %s" % (
251+
alarm['severity'],
252+
alarm['node_display_name'],
253+
alarm['feature_display_name'],
254+
alarm['event_type_display_name'])
255+
for exclude in self.excludes:
256+
regexp = re.compile(exclude)
257+
if bool(regexp.search(identifier)):
258+
return True
259+
return False
240260

241261
def build_output(self):
242262
states = {}
243263

244264
for alarm in self.data:
265+
if self._is_excluded(alarm):
266+
continue
267+
245268
severity = alarm['severity']
246269
if severity in states:
247270
states[severity] += 1
@@ -270,7 +293,11 @@ def build_status(self):
270293
states = []
271294

272295
for alarm in self.data:
273-
state = WARNING if alarm['severity'] in ['MEDIUM', 'LOW'] else CRITICAL # CRITICAL, HIGH
296+
if self._is_excluded(alarm):
297+
continue
298+
299+
# HIGH == CRITICAL
300+
state = WARNING if alarm['severity'] in ['MEDIUM', 'LOW'] else CRITICAL
274301
states.append(state)
275302

276303
if len(states) > 0:
@@ -285,15 +312,33 @@ class CapacityUsage(CheckResult):
285312
https://vdc-download.vmware.com/vmwb-repository/dcr-public/787988e9-6348-4b2a-8617-e6d672c690ee/a187360c-77d5-4c0c-92a8-8e07aa161a27/api_includes/method_GetProtonCapacityUsage.html
286313
"""
287314

288-
def __init__(self, data, max_age):
315+
def __init__(self, data, max_age, excludes):
289316
super().__init__()
290317
self.data = data
291318
self.max_age = max_age
319+
self.excludes = excludes
320+
if excludes is None:
321+
self.excludes = []
322+
323+
def _is_excluded(self, usage):
324+
# to exclude via --exclude
325+
identifier = "%s %s" % (
326+
usage['severity'],
327+
usage['display_name'])
328+
329+
for exclude in self.excludes:
330+
regexp = re.compile(exclude)
331+
if bool(regexp.search(identifier)):
332+
return True
333+
return False
292334

293335
def build_output(self):
294336
states = {}
295337

296338
for usage in self.data['capacity_usage']:
339+
if self._is_excluded(usage):
340+
continue
341+
297342
severity = usage['severity'] # INFO, WARNING, CRITICAL, ERROR
298343

299344
if severity in states:
@@ -341,6 +386,9 @@ def build_status(self):
341386
self.summary.append("last update older than %s minutes" % (self.max_age))
342387

343388
for usage in self.data['capacity_usage']:
389+
if self._is_excluded(usage):
390+
continue
391+
344392
severity = usage['severity'] # INFO, WARNING, CRITICAL, ERROR
345393

346394
if severity == "INFO":
@@ -398,6 +446,8 @@ def commandline(args):
398446
help='Password for Basic Auth', required=True)
399447
parser.add_argument('--mode', '-m', choices=['cluster-status', 'alarms', 'capacity-usage'],
400448
help='Check mode to exectue. Hint: alarms will only include open alarms.', required=True)
449+
parser.add_argument('--exclude', nargs='*', action='extend', type=str,
450+
help="Exclude alarms or usage from the check results. Can be used multiple times and supports regular expressions.")
401451
parser.add_argument('--max-age', '-M', type=int,
402452
help='Max age in minutes for capacity usage updates. Defaults to 5', default=5, required=False)
403453
parser.add_argument('--insecure',
@@ -421,11 +471,11 @@ def main(args):
421471
client = Client(args.api, args.username, args.password, verify=(not args.insecure), max_age=args.max_age)
422472

423473
if args.mode == 'cluster-status':
424-
return client.get_cluster_status().print_and_return()
474+
return client.get_cluster_status(args.exclude).print_and_return()
425475
if args.mode == 'alarms':
426-
return client.get_alarms().print_and_return()
476+
return client.get_alarms(args.exclude).print_and_return()
427477
if args.mode == 'capacity-usage':
428-
return client.get_capacity_usage().print_and_return()
478+
return client.get_capacity_usage(args.exclude).print_and_return()
429479

430480
print("[UNKNOWN] unknown mode %s" % args.mode)
431481
return UNKNOWN

test_check_vmware_nsxt.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,26 @@ def test_alarms_ok(self, mock_req, mock_print):
153153
self.assertEqual(actual, expected)
154154
mock_print.assert_called_with('[WARNING] 1 alarms - 1 medium\n\n[MEDIUM] (2021-04-26 15:25:18) (node1) Intelligence Health/Storage Latency High - Intelligence node storage latency is high.\n| alarms=1;;;0 alarms.medium=1;;;0')
155155

156+
@mock.patch('builtins.print')
157+
@mock.patch('requests.request')
158+
def test_alarms_exclude(self, mock_req, mock_print):
159+
160+
with open('testdata/fixtures/alarms.json') as f:
161+
testdata = json.load(f)
162+
163+
m = mock.MagicMock()
164+
m.status_code = 200
165+
m.json.return_value = testdata
166+
mock_req.return_value = m
167+
168+
c = Client('api', 'username', 'password', logger=None, verify=True, max_age=5)
169+
170+
actual = c.get_alarms(excludes=["M[A-Z]+M"]).print_and_return()
171+
expected = 0
172+
173+
self.assertEqual(actual, expected)
174+
mock_print.assert_called_with('[OK] 1 alarms\n| alarms=1;;;0')
175+
156176
@mock.patch('builtins.print')
157177
@mock.patch('requests.request')
158178
def test_capacity_usage_ok(self, mock_req, mock_print):
@@ -172,3 +192,20 @@ def test_capacity_usage_ok(self, mock_req, mock_print):
172192

173193
self.assertEqual(actual, expected)
174194
mock_print.assert_called_with('[WARNING] 28 info - last update: 2021-04-30 09:17:40 - last update older than 5 minutes\n\n[OK] [INFO] System-wide NAT rules: 0 of 25000 (0%)\n[OK] [INFO] Network Introspection Rules: 1 of 10000 (0.01%)\n[OK] [INFO] System-wide Endpoint Protection Enabled Hosts: 0 of 256 (0%)\n[OK] [INFO] Hypervisor Hosts: 18 of 1024 (1.75%)\n[OK] [INFO] System-wide Firewall Rules: 81 of 100000 (0.08%)\n[OK] [INFO] System-wide DHCP Pools: 0 of 10000 (0%)\n[OK] [INFO] System-wide Edge Nodes: 10 of 320 (3.12%)\n[OK] [INFO] Active Directory Domains (Identity Firewall): 0 of 4 (0%)\n[OK] [INFO] vSphere Clusters Prepared for NSX: 4 of 128 (3.12%)\n[OK] [INFO] Prefix-lists: 20 of 500 (4%)\n[OK] [INFO] Logical Switches: 12 of 10000 (0.12%)\n[OK] [INFO] System-wide Logical Switch Ports: 145 of 25000 (0.58%)\n[OK] [INFO] Active Directory Groups (Identity Firewall): 0 of 100000 (0%)\n[OK] [INFO] Distributed Firewall Rules: 75 of 100000 (0.07%)\n[OK] [INFO] System-wide Endpoint Protection Enabled Virtual Machines: 0 of 7500 (0%)\n[OK] [INFO] Distributed Firewall Sections: 23 of 10000 (0.23%)\n[OK] [INFO] Groups Based on IP Sets: 37 of 10000 (0.37%)\n[OK] [INFO] Edge Clusters: 3 of 160 (1.87%)\n[OK] [INFO] Tier-1 Logical Routers with NAT Enabled: 0 of 4000 (0%)\n[OK] [INFO] System-wide Firewall Sections: 29 of 10000 (0.29%)\n[OK] [INFO] Network Introspection Sections: 1 of 500 (0.2%)\n[OK] [INFO] Groups: 74 of 20000 (0.37%)\n[OK] [INFO] Tier-1 Logical Routers: 4 of 4000 (0.1%)\n[OK] [INFO] IP Sets: 37 of 10000 (0.37%)\n[OK] [INFO] Network Introspection Service Chains: 0 of 24 (0%)\n[OK] [INFO] Network Introspection Service Paths: 0 of 4000 (0%)\n[OK] [INFO] Tier-0 Logical Routers: 2 of 160 (1.25%)\n[OK] [INFO] DHCP Server Instances: 0 of 10000 (0%)\n| number_of_nat_rules=0%;70;100;0;100 number_of_si_rules=0.01%;70;100;0;100 number_of_gi_protected_hosts=0%;70;100;0;100 number_of_prepared_hosts=1.75%;70;100;0;100 number_of_firewall_rules=0.08%;70;100;0;100 number_of_dhcp_ip_pools=0%;70;100;0;100 number_of_edge_nodes=3.12%;70;100;0;100 number_of_active_directory_domains=0%;70;100;0;100 number_of_vcenter_clusters=3.12%;70;100;0;100 number_of_prefix_list=4%;70;100;0;100 number_of_logical_switches=0.12%;70;100;0;100 number_of_logical_ports=0.58%;70;100;0;100 number_of_active_directory_groups=0%;70;100;0;100 number_of_dfw_rules=0.07%;70;100;0;100 number_of_gi_protected_vms=0%;70;100;0;100 number_of_dfw_sections=0.23%;70;100;0;100 number_of_groups_based_on_ip_sets=0.37%;70;100;0;100 number_of_edge_clusters=1.87%;70;100;0;100 number_of_tier1_with_nat_rule=0%;70;100;0;100 number_of_firewall_sections=0.29%;70;100;0;100 number_of_si_sections=0.2%;70;100;0;100 number_of_nsgroup=0.37%;70;100;0;100 number_of_tier1_routers=0.1%;70;100;0;100 number_of_ipsets=0.37%;70;100;0;100 number_of_si_service_chains=0%;70;100;0;100 number_of_si_service_paths=0%;70;100;0;100 number_of_tier0_routers=1.25%;70;100;0;100 number_of_dhcp_servers=0%;70;100;0;100')
195+
196+
@mock.patch('builtins.print')
197+
@mock.patch('requests.request')
198+
def test_capacity_usage_exclude(self, mock_req, mock_print):
199+
200+
with open('testdata/fixtures/capacity-usage.json') as f:
201+
testdata = json.load(f)
202+
203+
m = mock.MagicMock()
204+
m.status_code = 200
205+
m.json.return_value = testdata
206+
mock_req.return_value = m
207+
208+
c = Client('api', 'username', 'password', logger=None, verify=True, max_age=5)
209+
210+
actual = c.get_capacity_usage(".*").print_and_return()
211+
expected = 0

0 commit comments

Comments
 (0)