Skip to content
This repository was archived by the owner on Dec 8, 2025. It is now read-only.

Commit 652c339

Browse files
Merge pull request #68 from IBM/develop
* Changed top level execption catching from catching only our own ValueErrors to any execptions - preventing a whole abort of SPPMon if something unexpected happens. This will reduce the need of urgent hotfixes like this one. * Changed typings from critical components to support better linting - Hotfixes SPPMon storages request to fail due `free` or `total` beeing none. - Changes empty result severity of REST-Requests from error to info
2 parents 1402f0b + 8a31b8f commit 652c339

File tree

4 files changed

+50
-38
lines changed

4 files changed

+50
-38
lines changed

python/sppmon.py

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
02/10/2021 version 0.13.1 Fixes to partial send(influx), including influxdb version into stats
5656
03/29/2021 version 0.13.2 Fixes to typing, reducing error messages and tracking code for NaN bug
5757
06/07/2021 version 0.13.3 Hotfixing version endpoint for SPP 10.1.8.1
58+
06/09/2021 version 0.13.4 Hotfixing storage execption, chaning top-level execption handling to reduce the need of further hotfixes
5859
"""
5960
from __future__ import annotations
6061
import functools
@@ -66,7 +67,7 @@
6667
from subprocess import CalledProcessError
6768
import sys
6869
import time
69-
from typing import Any, Dict, List, NoReturn, Union
70+
from typing import Any, Dict, List, NoReturn, Union, Optional
7071

7172
from influx.influx_client import InfluxClient
7273
from sppConnection.api_queries import ApiQueries
@@ -82,7 +83,7 @@
8283
from utils.spp_utils import SppUtils
8384

8485
# Version:
85-
VERSION = "0.13.3 (2021/06/07)"
86+
VERSION = "0.13.4 (2021/06/09)"
8687

8788
# ----------------------------------------------------------------------------
8889
# command line parameter parsing
@@ -145,7 +146,8 @@
145146
LOGGER_NAME = 'sppmon'
146147
LOGGER = logging.getLogger(LOGGER_NAME)
147148

148-
ERROR_CODE_CMD_LINE = 2
149+
ERROR_CODE_START_ERROR = 3
150+
ERROR_CODE_CMD_ARGS = 2
149151
ERROR_CODE = 1
150152

151153

@@ -261,13 +263,13 @@ class SppMon:
261263
"""Configured spp log rentation time, logs get deleted after this time."""
262264

263265
# set later in each method, here to avoid missing attribute
264-
influx_client: InfluxClient = None
265-
rest_client: RestClient = None
266-
api_queries: ApiQueries = None
267-
system_methods: SystemMethods = None
268-
job_methods: JobMethods = None
269-
protection_methods: ProtectionMethods = None
270-
ssh_methods: SshMethods = None
266+
influx_client: Optional[InfluxClient] = None
267+
rest_client: Optional[RestClient] = None
268+
api_queries: Optional[ApiQueries] = None
269+
system_methods: Optional[SystemMethods] = None
270+
job_methods: Optional[JobMethods] = None
271+
protection_methods: Optional[ProtectionMethods] = None
272+
ssh_methods: Optional[SshMethods] = None
271273

272274
def __init__(self):
273275
self.log_path: str = ""
@@ -280,7 +282,7 @@ def __init__(self):
280282
LOGGER.info("Starting SPPMon")
281283
if(not self.check_pid_file()):
282284
ExceptionUtils.error_message("Another instance of sppmon with the same args is running")
283-
self.exit(ERROR_CODE_CMD_LINE)
285+
self.exit(ERROR_CODE_START_ERROR)
284286

285287
# everything is option, otherwise its a typo.
286288
if(len(ARGS) > 0):
@@ -296,12 +298,12 @@ def __init__(self):
296298

297299
if(not OPTIONS.confFileJSON):
298300
ExceptionUtils.error_message("missing config file, aborting")
299-
self.exit(error_code=ERROR_CODE_CMD_LINE)
301+
self.exit(error_code=ERROR_CODE_CMD_ARGS)
300302
try:
301303
self.config_file = SppUtils.read_conf_file(config_file_path=OPTIONS.confFileJSON)
302304
except ValueError as error:
303305
ExceptionUtils.exception_info(error=error, extra_message="Error when trying to read Config file, unable to read")
304-
self.exit(error_code=ERROR_CODE_CMD_LINE)
306+
self.exit(error_code=ERROR_CODE_START_ERROR)
305307

306308
LOGGER.info("Setting up configurations")
307309
self.setup_args()
@@ -422,7 +424,7 @@ def set_critial_configs(self, config_file: Dict[str, Any]) -> None:
422424
"""
423425
if(not config_file):
424426
ExceptionUtils.error_message("missing or empty config file, aborting")
425-
self.exit(error_code=ERROR_CODE_CMD_LINE)
427+
self.exit(error_code=ERROR_CODE_START_ERROR)
426428
try:
427429
# critical components only
428430
self.influx_client = InfluxClient(config_file)
@@ -446,7 +448,10 @@ def set_optional_configs(self, config_file: Dict[str, Any]) -> None:
446448

447449
if(not config_file):
448450
ExceptionUtils.error_message("missing or empty config file, aborting.")
449-
self.exit(error_code=ERROR_CODE_CMD_LINE)
451+
self.exit(error_code=ERROR_CODE_START_ERROR)
452+
if(not self.influx_client):
453+
ExceptionUtils.error_message("Influx client is somehow missing. aborting")
454+
self.exit(error_code=ERROR_CODE)
450455

451456
# ############################ REST-API #####################################
452457
try:
@@ -571,7 +576,7 @@ def setup_args(self) -> None:
571576
if((OPTIONS.create_dashboard or bool(OPTIONS.dashboard_folder_path)) and not
572577
(OPTIONS.create_dashboard and bool(OPTIONS.dashboard_folder_path))):
573578
ExceptionUtils.error_message("> Using --create_dashboard without associated folder path. Aborting.")
574-
self.exit(ERROR_CODE_CMD_LINE)
579+
self.exit(ERROR_CODE_CMD_ARGS)
575580

576581
# incremental setup, higher executes all below
577582
all_args: bool = OPTIONS.all
@@ -677,12 +682,15 @@ def exit(self, error_code: int = False) -> NoReturn:
677682

678683
# error with the command line arguments
679684
# dont store runtime here
680-
if(error_code == ERROR_CODE_CMD_LINE):
685+
if(error_code == ERROR_CODE_CMD_ARGS):
681686
prog_args = []
682687
prog_args.append(sys.argv[0])
683688
prog_args.append("--help")
684689
os.execv(sys.executable, ['python'] + prog_args)
685-
sys.exit(ERROR_CODE_CMD_LINE) # unreachable?
690+
sys.exit(ERROR_CODE_CMD_ARGS) # unreachable?
691+
if(error_code == ERROR_CODE_START_ERROR):
692+
ExceptionUtils.error_message("Error when starting SPPMon. Please review the errors above")
693+
sys.exit(ERROR_CODE_START_ERROR)
686694

687695
script_end_time = SppUtils.get_actual_time_sec()
688696
LOGGER.debug("Script end time: %d", script_end_time)
@@ -725,7 +733,7 @@ def main(self):
725733
try:
726734
self.system_methods.sites()
727735
self.influx_client.flush_insert_buffer()
728-
except ValueError as error:
736+
except Exception as error:
729737
ExceptionUtils.exception_info(
730738
error=error,
731739
extra_message="Top-level-error when requesting sites, skipping them all")
@@ -734,7 +742,7 @@ def main(self):
734742
try:
735743
self.system_methods.cpuram()
736744
self.influx_client.flush_insert_buffer()
737-
except ValueError as error:
745+
except Exception as error:
738746
ExceptionUtils.exception_info(
739747
error=error,
740748
extra_message="Top-level-error when collecting cpu stats, skipping them all")
@@ -743,7 +751,7 @@ def main(self):
743751
try:
744752
self.system_methods.sppcatalog()
745753
self.influx_client.flush_insert_buffer()
746-
except ValueError as error:
754+
except Exception as error:
747755
ExceptionUtils.exception_info(
748756
error=error,
749757
extra_message="Top-level-error when collecting file system stats, skipping them all")
@@ -754,7 +762,7 @@ def main(self):
754762
try:
755763
self.job_methods.get_all_jobs()
756764
self.influx_client.flush_insert_buffer()
757-
except ValueError as error:
765+
except Exception as error:
758766
ExceptionUtils.exception_info(
759767
error=error,
760768
extra_message="Top-level-error when requesting jobs, skipping them all")
@@ -764,7 +772,7 @@ def main(self):
764772
try:
765773
self.job_methods.job_logs()
766774
self.influx_client.flush_insert_buffer()
767-
except ValueError as error:
775+
except Exception as error:
768776
ExceptionUtils.exception_info(
769777
error=error,
770778
extra_message="Top-level-error when requesting job logs, skipping them all")
@@ -776,7 +784,7 @@ def main(self):
776784
try:
777785
self.ssh_methods.ssh()
778786
self.influx_client.flush_insert_buffer()
779-
except ValueError as error:
787+
except Exception as error:
780788
ExceptionUtils.exception_info(
781789
error=error,
782790
extra_message="Top-level-error when excecuting ssh commands, skipping them all")
@@ -786,7 +794,7 @@ def main(self):
786794
try:
787795
self.protection_methods.store_vms()
788796
self.influx_client.flush_insert_buffer()
789-
except ValueError as error:
797+
except Exception as error:
790798
ExceptionUtils.exception_info(
791799
error=error,
792800
extra_message="Top-level-error when requesting all VMs, skipping them all")
@@ -797,7 +805,7 @@ def main(self):
797805
self.protection_methods.vms_per_sla()
798806
self.protection_methods.sla_dumps()
799807
self.influx_client.flush_insert_buffer()
800-
except ValueError as error:
808+
except Exception as error:
801809
ExceptionUtils.exception_info(
802810
error=error,
803811
extra_message="Top-level-error when requesting and computing VMs per sla, skipping them all")
@@ -807,7 +815,7 @@ def main(self):
807815
try:
808816
self.protection_methods.create_inventory_summary()
809817
self.influx_client.flush_insert_buffer()
810-
except ValueError as error:
818+
except Exception as error:
811819
ExceptionUtils.exception_info(
812820
error=error,
813821
extra_message="Top-level-error when creating inventory summary, skipping them all")
@@ -816,7 +824,7 @@ def main(self):
816824
try:
817825
self.protection_methods.vadps()
818826
self.influx_client.flush_insert_buffer()
819-
except ValueError as error:
827+
except Exception as error:
820828
ExceptionUtils.exception_info(
821829
error=error,
822830
extra_message="Top-level-error when requesting vadps, skipping them all")
@@ -825,7 +833,7 @@ def main(self):
825833
try:
826834
self.protection_methods.storages()
827835
self.influx_client.flush_insert_buffer()
828-
except ValueError as error:
836+
except Exception as error:
829837
ExceptionUtils.exception_info(
830838
error=error,
831839
extra_message="Top-level-error when collecting storages, skipping them all")
@@ -835,7 +843,7 @@ def main(self):
835843
if(OPTIONS.copy_database):
836844
try:
837845
self.influx_client.copy_database(OPTIONS.copy_database)
838-
except ValueError as error:
846+
except Exception as error:
839847
ExceptionUtils.exception_info(
840848
error=error,
841849
extra_message="Top-level-error when coping database.")
@@ -845,7 +853,7 @@ def main(self):
845853
if(OPTIONS.test):
846854
try:
847855
OtherMethods.test_connection(self.influx_client, self.rest_client, self.config_file)
848-
except ValueError as error:
856+
except Exception as error:
849857
ExceptionUtils.exception_info(
850858
error=error,
851859
extra_message="Top-level-error when testing connection.")
@@ -859,7 +867,7 @@ def main(self):
859867
OtherMethods.create_dashboard(
860868
dashboard_folder_path=OPTIONS.dashboard_folder_path,
861869
database_name=self.influx_client.database.name)
862-
except ValueError as error:
870+
except Exception as error:
863871
ExceptionUtils.exception_info(
864872
error=error,
865873
extra_message="Top-level-error when creating dashboard")

python/sppmonMethods/other.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from sppmonMethods.ssh import SshMethods
1010
from utils.methods_utils import MethodUtils
1111
from influx.influx_client import InfluxClient
12-
from typing import Dict, Any, List
12+
from typing import Dict, Any, List, Optional
1313
import logging
1414
import os
1515
import re
@@ -22,7 +22,7 @@
2222
class OtherMethods:
2323

2424
@staticmethod
25-
def test_connection(influx_client: InfluxClient, rest_client: RestClient, config_file: Dict[str, Any]):
25+
def test_connection(influx_client: InfluxClient, rest_client: Optional[RestClient], config_file: Dict[str, Any]):
2626
if(not config_file):
2727
raise ValueError("SPPmon does not work without a config file")
2828

@@ -49,6 +49,8 @@ def test_connection(influx_client: InfluxClient, rest_client: RestClient, config
4949

5050
LOGGER.info("> Testing REST-API of SPP.")
5151
try:
52+
if(not rest_client):
53+
raise ValueError("Rest-client is setup. Unavailable to test it.")
5254
rest_client.login()
5355
(version_nr, build_nr) = rest_client.get_spp_version_build()
5456
LOGGER.info(f">> Sucessfully connected to SPP V{version_nr}, build {build_nr}.")

python/sppmonMethods/protection.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,10 @@ def storages(self) -> None:
123123
# get calulated extra info
124124
for row in result:
125125
row['siteName'] = self.__system_methods.site_name_by_id(row['site'])
126-
if('free' in row and 'total' in row
127-
and row['free'] > 0 and row['total'] > 0):
126+
if('free' in row and row['free'] != None and
127+
'total' in row and row['total'] != None and
128+
row['total'] > 0):
129+
128130
row['used'] = row['total'] - row['free']
129131
row['pct_free'] = row['free'] / row['total'] * 100
130132
row['pct_used'] = row['used'] / row['total'] * 100

python/utils/methods_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,10 @@ def query_something(
132132

133133
LOGGER.info("> getting %s", name)
134134

135-
# request all Sites from SPP
135+
# request information from SPP via the api_queries.py file
136136
elem_list = source_func()
137137
if(not elem_list):
138-
ExceptionUtils.error_message(f">> No {name} are found")
138+
LOGGER.info(f"WARNING: No {name} are returned when requesting from server")
139139

140140
if(rename_tuples):
141141
for elem in elem_list:

0 commit comments

Comments
 (0)