Skip to content

Commit d993b05

Browse files
authored
Merge branch 'main' into add_multiple_vserver_support
2 parents 53209a8 + 2d2e871 commit d993b05

File tree

2 files changed

+197
-173
lines changed

2 files changed

+197
-173
lines changed

Monitoring/auto-add-cw-alarms/auto_add_cw_alarms.py

Lines changed: 98 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -452,92 +452,104 @@ def lambda_handler(event, context):
452452
for region in regions:
453453
if region in fsxRegions:
454454
print(f'Scanning {region}')
455-
fsx = boto3.client('fsx', region_name=region, config=boto3Config)
456-
cw = boto3.client('cloudwatch', region_name=region, config=boto3Config)
457-
#
458-
# Get all the file systems, volumes and alarm in the region.
459-
fss = getFss(fsx)
460-
volumes = getVolumes(fsx)
461-
alarms = getAlarms(cw)
462-
#
463-
# Scan for filesystems without CPU Utilization Alarm.
464-
for fs in fss:
465-
if(fs['FileSystemType'] == "ONTAP"):
466-
threshold = int(getCPUAlarmThresholdTagValue(fs['Tags']))
467-
if(threshold != 100):
468-
fsId = fs['FileSystemId']
469-
fsName = fsId.replace('fs-', 'FsxId')
470-
alarmName = alarmPrefixCPU + fsId
471-
alarmDescription = f"CPU utilization alarm for file system {fsName}{customerId} in region {region}."
472-
473-
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
474-
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
475-
print(f'Adding CPU Alarm for {fs["FileSystemId"]}')
476-
add_cpu_alarm(cw, fsId, alarmName, alarmDescription, threshold, region)
477-
#
478-
# Scan for CPU alarms without a FSxN filesystem.
479-
for alarm in alarms:
480-
alarmName = alarm['AlarmName']
481-
if(alarmName[:len(alarmPrefixCPU)] == alarmPrefixCPU):
482-
fsId = alarmName[len(alarmPrefixCPU):]
483-
if(not contains_fs(fsId, fss) and onlyFilesystemId == None or
484-
not contains_fs(fsId, fss) and onlyFilesystemId != None and onlyFilesystemId == fsId):
485-
print("Deleting alarm: " + alarmName + " in region " + region)
486-
delete_alarm(cw, alarmName)
487-
#
488-
# Scan for filesystems without SSD Utilization Alarm.
489-
for fs in fss:
490-
if(fs['FileSystemType'] == "ONTAP"):
491-
threshold = int(getSSDAlarmThresholdTagValue(fs['Tags']))
492-
if(threshold != 100):
493-
fsId = fs['FileSystemId']
494-
fsName = fsId.replace('fs-', 'FsxId')
495-
alarmName = alarmPrefixSSD + fsId
496-
alarmDescription = f"SSD utilization alarm for file system {fsName}{customerId} in region {region}."
497-
498-
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
499-
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
500-
print(f'Adding SSD Alarm for {fsId}')
501-
add_ssd_alarm(cw, fs['FileSystemId'], alarmName, alarmDescription, threshold, region)
502-
#
503-
# Scan for SSD alarms without a FSxN filesystem.
504-
for alarm in alarms:
505-
alarmName = alarm['AlarmName']
506-
if(alarmName[:len(alarmPrefixSSD)] == alarmPrefixSSD):
507-
fsId = alarmName[len(alarmPrefixSSD):]
508-
if(not contains_fs(fsId, fss) and onlyFilesystemId == None or
509-
not contains_fs(fsId, fss) and onlyFilesystemId != None and onlyFilesystemId == fsId):
510-
print("Deleteing alarm: " + alarmName + " in region " + region)
511-
delete_alarm(cw, alarmName)
512-
#
513-
# Scan for volumes without alarms.
514-
for volume in volumes:
515-
if(volume['VolumeType'] == "ONTAP"):
516-
volumeId = volume['VolumeId']
517-
volumeName = volume['Name']
518-
volumeARN = volume['ResourceARN']
519-
fsId = volume['FileSystemId']
520-
521-
threshold = int(getAlarmThresholdTagValue(fsx, volumeARN))
522-
523-
if(threshold != 100): # No alarm if the value is set to 100.
524-
alarmName = alarmPrefixVolume + volumeId
525-
fsName = fsId.replace('fs-', 'FsxId')
526-
alarmDescription = f"Volume utilization alarm for volumeId {volumeId}{customerId}, File System Name: {fsName}, Volume Name: {volumeName} in region {region}."
527-
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
528-
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
529-
print(f'Adding volume utilization alarm for {volumeName} in region {region}.')
530-
add_volume_alarm(cw, volumeId, alarmName, alarmDescription, fsId, threshold, region)
531-
#
532-
# Scan for volume alarms without volumes.
533-
for alarm in alarms:
534-
alarmName = alarm['AlarmName']
535-
if(alarmName[:len(alarmPrefixVolume)] == alarmPrefixVolume):
536-
volumeId = alarmName[len(alarmPrefixVolume):]
537-
if(not contains_volume(volumeId, volumes) and onlyFilesystemId == None or
538-
not contains_volume(volumeId, volumes) and onlyFilesystemId != None and onlyFilesystemId == getFileSystemId(alarm)):
539-
print("Deleteing alarm: " + alarmName + " in region " + region)
540-
delete_alarm(cw, alarmName)
455+
try:
456+
fsx = boto3.client('fsx', region_name=region, config=boto3Config)
457+
cw = boto3.client('cloudwatch', region_name=region, config=boto3Config)
458+
#
459+
# Get all the file systems, volumes and alarm in the region.
460+
fss = getFss(fsx)
461+
volumes = getVolumes(fsx)
462+
alarms = getAlarms(cw)
463+
#
464+
# Scan for filesystems without CPU Utilization Alarm.
465+
for fs in fss:
466+
if(fs['FileSystemType'] == "ONTAP"):
467+
threshold = int(getCPUAlarmThresholdTagValue(fs['Tags']))
468+
if(threshold != 100):
469+
fsId = fs['FileSystemId']
470+
fsName = fsId.replace('fs-', 'FsxId')
471+
alarmName = alarmPrefixCPU + fsId
472+
alarmDescription = f"CPU utilization alarm for file system {fsName}{customerId} in region {region}."
473+
474+
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
475+
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
476+
print(f'Adding CPU Alarm for {fs["FileSystemId"]}')
477+
add_cpu_alarm(cw, fsId, alarmName, alarmDescription, threshold, region)
478+
#
479+
# Scan for CPU alarms without a FSxN filesystem.
480+
for alarm in alarms:
481+
alarmName = alarm['AlarmName']
482+
if(alarmName[:len(alarmPrefixCPU)] == alarmPrefixCPU):
483+
fsId = alarmName[len(alarmPrefixCPU):]
484+
if(not contains_fs(fsId, fss) and onlyFilesystemId == None or
485+
not contains_fs(fsId, fss) and onlyFilesystemId != None and onlyFilesystemId == fsId):
486+
print("Deleting alarm: " + alarmName + " in region " + region)
487+
delete_alarm(cw, alarmName)
488+
#
489+
# Scan for filesystems without SSD Utilization Alarm.
490+
for fs in fss:
491+
if(fs['FileSystemType'] == "ONTAP"):
492+
threshold = int(getSSDAlarmThresholdTagValue(fs['Tags']))
493+
if(threshold != 100):
494+
fsId = fs['FileSystemId']
495+
fsName = fsId.replace('fs-', 'FsxId')
496+
alarmName = alarmPrefixSSD + fsId
497+
alarmDescription = f"SSD utilization alarm for file system {fsName}{customerId} in region {region}."
498+
499+
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
500+
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
501+
print(f'Adding SSD Alarm for {fsId}')
502+
add_ssd_alarm(cw, fs['FileSystemId'], alarmName, alarmDescription, threshold, region)
503+
#
504+
# Scan for SSD alarms without a FSxN filesystem.
505+
for alarm in alarms:
506+
alarmName = alarm['AlarmName']
507+
if(alarmName[:len(alarmPrefixSSD)] == alarmPrefixSSD):
508+
fsId = alarmName[len(alarmPrefixSSD):]
509+
if(not contains_fs(fsId, fss) and onlyFilesystemId == None or
510+
not contains_fs(fsId, fss) and onlyFilesystemId != None and onlyFilesystemId == fsId):
511+
print("Deleteing alarm: " + alarmName + " in region " + region)
512+
delete_alarm(cw, alarmName)
513+
#
514+
# Scan for volumes without alarms.
515+
for volume in volumes:
516+
if(volume['VolumeType'] == "ONTAP"):
517+
volumeId = volume['VolumeId']
518+
volumeName = volume['Name']
519+
volumeARN = volume['ResourceARN']
520+
fsId = volume['FileSystemId']
521+
522+
threshold = int(getAlarmThresholdTagValue(fsx, volumeARN))
523+
524+
if(threshold != 100): # No alarm if the value is set to 100.
525+
alarmName = alarmPrefixVolume + volumeId
526+
fsName = fsId.replace('fs-', 'FsxId')
527+
alarmDescription = f"Volume utilization alarm for volumeId {volumeId}{customerId}, File System Name: {fsName}, Volume Name: {volumeName} in region {region}."
528+
if(not contains_alarm(alarmName, alarms) and onlyFilesystemId == None or
529+
not contains_alarm(alarmName, alarms) and onlyFilesystemId != None and onlyFilesystemId == fsId):
530+
print(f'Adding volume utilization alarm for {volumeName} in region {region}.')
531+
add_volume_alarm(cw, volumeId, alarmName, alarmDescription, fsId, threshold, region)
532+
#
533+
# Scan for volume alarms without volumes.
534+
for alarm in alarms:
535+
alarmName = alarm['AlarmName']
536+
if(alarmName[:len(alarmPrefixVolume)] == alarmPrefixVolume):
537+
volumeId = alarmName[len(alarmPrefixVolume):]
538+
if(not contains_volume(volumeId, volumes) and onlyFilesystemId == None or
539+
not contains_volume(volumeId, volumes) and onlyFilesystemId != None and onlyFilesystemId == getFileSystemId(alarm)):
540+
print("Deleteing alarm: " + alarmName + " in region " + region)
541+
delete_alarm(cw, alarmName)
542+
543+
except botocore.exceptions.ClientError as e:
544+
if e.response['Error']['Code'] == 'ServiceUnavailableException':
545+
print(f"Warning: Service Unavailable fault while scanning {region}. Skipping")
546+
continue
547+
else:
548+
print(f"boto3 client error: {json.dumps(e.response)}")
549+
raise e
550+
except botocore.exceptions.EndpointConnectionError as e:
551+
print(f"Warning: Endpoint Connection fault while scanning {region}. Skipping")
552+
continue
541553

542554
return
543555

0 commit comments

Comments
 (0)