Skip to content
This repository was archived by the owner on Dec 8, 2025. It is now read-only.

Commit 1c140bc

Browse files
authored
Merge pull request #72 from IBM/hotfix_processStats
Re-Introducing TOP to fix #71
2 parents 652c339 + 2e789f7 commit 1c140bc

File tree

5 files changed

+98
-8
lines changed

5 files changed

+98
-8
lines changed

python/influx/definitions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,7 @@ def add_table_definitions(cls, database: Database):
10111011
'PID',
10121012
'USER',
10131013
'hostName',
1014+
'collectionType',
10141015
'ssh_type'
10151016
],# time key is capture time
10161017
retention_policy=cls._RP_DAYS_14(),

python/sppConnection/ssh_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def execute_commands(self, commands: List[SshCommand], verbose: bool = False) ->
210210
if(verbose):
211211
LOGGER.info("> connection successfull")
212212

213-
new_command_list = []
213+
new_command_list: List[SshCommand] = []
214214
for ssh_command in commands:
215215

216216
try:

python/sppmon.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
03/29/2021 version 0.13.2 Fixes to typing, reducing error messages and tracking code for NaN bug
5757
06/07/2021 version 0.13.3 Hotfixing version endpoint for SPP 10.1.8.1
5858
06/09/2021 version 0.13.4 Hotfixing storage execption, chaning top-level execption handling to reduce the need of further hotfixes
59+
08/06/2021 version 0.13.5 Fixing PS having unituitive CPU-recording, reintroducing TOP to collect CPU informations only
5960
"""
6061
from __future__ import annotations
6162
import functools
@@ -83,7 +84,7 @@
8384
from utils.spp_utils import SppUtils
8485

8586
# Version:
86-
VERSION = "0.13.4 (2021/06/09)"
87+
VERSION = "0.13.5 (2021/08/06)"
8788

8889
# ----------------------------------------------------------------------------
8990
# command line parameter parsing

python/sppmonMethods/ssh.py

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,24 @@ def __init__(self, influx_client: InfluxClient, config_file: Dict[str, Any], ver
149149

150150
# SERVER
151151
# add server later due multiple processes
152-
self.__ps_grep_list = ["mongod", "beam.smp", "java"] # be aware this is double declared below
153-
for grep_name in self.__ps_grep_list:
152+
self.__process_grep_list = ["mongod", "beam.smp", "java"] # be aware this is double declared below
153+
for grep_name in self.__process_grep_list:
154154
self.__client_commands[SshTypes.SERVER].append(
155155
SshCommand(
156156
command=f"ps -o \"%cpu,%mem,comm,rss,vsz,user,pid,etimes\" -p $(pgrep -d',' -f {grep_name}) S -ww",
157157
parse_function=self._parse_ps_cmd,
158158
table_name="processStats"
159159
)
160160
)
161+
# Top commands for CPU Only
162+
for grep_name in self.__process_grep_list:
163+
self.__client_commands[SshTypes.SERVER].append(
164+
SshCommand(
165+
command=f"top -bs -w 512 -n1 -p $(pgrep -d',' -f {grep_name})",
166+
parse_function=self._parse_top_cmd,
167+
table_name="processStats"
168+
)
169+
)
161170

162171
# ################ END OF SSH COMMAND LIST GROUPS ############################
163172

@@ -221,6 +230,74 @@ def ssh(self) -> None:
221230
ExceptionUtils.exception_info(
222231
error=error, extra_message=f"Top-level-error when excecuting {ssh_type.value} ssh commands, skipping them all")
223232

233+
def _parse_top_cmd(self, ssh_command: SshCommand, ssh_type: SshTypes) -> Tuple[str, List[Dict[str, Any]]]:
234+
"""Parses the result of the `top` command, splitting it into its parts.
235+
236+
Arguments:
237+
ssh_command {SshCommand} -- command with saved result
238+
ssh_type {SshTypes} -- type of the client
239+
Raises:
240+
ValueError: no command given or no result saved
241+
ValueError: no ssh type given
242+
Returns:
243+
Tuple[str, List[Dict[str, Any]]] -- Tuple of the tablename and a insert list
244+
"""
245+
246+
if(not ssh_command or not ssh_command.result):
247+
raise ValueError("no command given or empty result")
248+
if(not ssh_type):
249+
raise ValueError("no sshtype given")
250+
if(not ssh_command.table_name):
251+
raise ValueError("need table name to insert parsed value")
252+
253+
result_lines = ssh_command.result.splitlines()
254+
header = result_lines[6].split()
255+
256+
values: List[Dict[str, Any]] = list(
257+
map(lambda row: dict(zip(header, row.split())), result_lines[7:])) # type: ignore
258+
259+
# All lines above (header) 5 are pruned, not used anymore. This data is tracked via ps (see Issue #71)
260+
261+
time_pattern = re.compile(r"(\d+):(\d{2})(?:\.(\d{2}))?")
262+
263+
# remove `top` from commands, it is also tracked
264+
values = list(filter(lambda row: row["COMMAND"] in self.__process_grep_list, values))
265+
266+
for row in values:
267+
# Delete Memory, this is tracked by ps command (See Issue #71)
268+
row.pop("VIRT", None)
269+
row.pop("RES", None)
270+
row.pop("SHR", None)
271+
row.pop("%MEM", None)
272+
# Add information
273+
row["collectionType"] = "TOP"
274+
275+
# unused information
276+
row.pop("PR", None)
277+
row.pop("NI", None)
278+
row.pop("S", None)
279+
280+
281+
# set default needed fields
282+
row['hostName'] = ssh_command.host_name
283+
row['ssh_type'] = ssh_type.name
284+
(time_key, time_value) = SppUtils.get_capture_timestamp_sec()
285+
row[time_key] = time_value
286+
287+
# split time into seconds
288+
match = re.match(time_pattern, row['TIME+'])
289+
if(match):
290+
time_list = match.groups()
291+
(hours, minutes, seconds) = time_list
292+
if(seconds is None):
293+
seconds = 0
294+
time = int(hours)*pow(60, 2) + int(minutes)*pow(60, 1) + int(seconds)*pow(60, 0)
295+
else:
296+
time = None
297+
row['TIME+'] = time
298+
299+
return (ssh_command.table_name, values)
300+
224301
def _parse_ps_cmd(self, ssh_command: SshCommand, ssh_type: SshTypes) -> Tuple[str, List[Dict[str, Any]]]:
225302
"""Parses the result of the `df` command, splitting it into its parts.
226303
@@ -246,10 +323,15 @@ def _parse_ps_cmd(self, ssh_command: SshCommand, ssh_type: SshTypes) -> Tuple[st
246323
values: List[Dict[str, Any]] = list(
247324
map(lambda row: dict(zip(header, row.split())), result_lines[1:])) # type: ignore
248325

249-
# remove top statistic itself to avoid spam with useless information
250-
values = list(filter(lambda row: row["COMMAND"] in self.__ps_grep_list, values))
326+
# remove `ps` from commands, it is also tracked
327+
values = list(filter(lambda row: row["COMMAND"] in self.__process_grep_list, values))
251328

252329
for row in values:
330+
# Remove CPU, it is tracked by TOP-Command (see Issue #71)
331+
row.pop("%CPU", None)
332+
# Add information
333+
row["collectionType"] = "PS"
334+
253335
# set default needed fields
254336
row['hostName'] = ssh_command.host_name
255337
row['ssh_type'] = ssh_type.name

python/utils/methods_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ def ssh_execute_commands(cls, ssh_clients: List[SshClient], ssh_type: SshTypes,
5959
LOGGER.info(f"No {ssh_type.name} ssh client present. Aborting command")
6060
return []
6161

62-
ssh_cmd_response_list = []
62+
# List to persist ssh-result stats over each client
63+
ssh_cmd_response_list: List[Dict[str,Union[str, int, None]]] = []
64+
# list to insert into influx, tuple of table and its result-lists
6365
result_list: List[Tuple[str, List[Dict[str, Any]]]] = []
6466
for client in client_list:
6567

@@ -77,7 +79,8 @@ def ssh_execute_commands(cls, ssh_clients: List[SshClient], ssh_type: SshTypes,
7779
continue
7880

7981
for ssh_command in result_commands:
80-
insert_dict = {}
82+
# generate stats for the ssh-stats list
83+
insert_dict: Dict[str, Union[str, int, None]] = {}
8184
insert_dict["host"] = ssh_command.host_name
8285
insert_dict["command"] = ssh_command.cmd
8386
insert_dict["output"] = json.dumps(ssh_command.result)
@@ -87,13 +90,16 @@ def ssh_execute_commands(cls, ssh_clients: List[SshClient], ssh_type: SshTypes,
8790

8891
ssh_cmd_response_list.append(insert_dict)
8992

93+
# execute the command
9094
try:
9195
table_result_tuple = ssh_command.parse_result(ssh_type=ssh_type)
9296
if(table_result_tuple):
97+
# save the command into the result set wit its table
9398
result_list.append(table_result_tuple)
9499
except ValueError as error:
95100
ExceptionUtils.exception_info(error=error, extra_message="Error when parsing result, skipping parsing of this result")
96101

102+
# append the ssh command once, cause each client already added into the ssh_command list
97103
result_list.append(("sshCmdResponse", ssh_cmd_response_list))
98104
return result_list
99105

0 commit comments

Comments
 (0)