Skip to content

Commit ea2b445

Browse files
committed
fix: only use psutil for memory reporting
1 parent ff07c21 commit ea2b445

File tree

1 file changed

+8
-36
lines changed
  • src/DIRAC/WorkloadManagementSystem/JobWrapper

1 file changed

+8
-36
lines changed

src/DIRAC/WorkloadManagementSystem/JobWrapper/Watchdog.py

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
########################################################################
2-
# File : Watchdog.py
3-
# Author: Stuart Paterson
4-
########################################################################
5-
61
""" The Watchdog class is used by the Job Wrapper to resolve and monitor
72
the system resource consumption. The Watchdog can determine if
83
a running job is stalled and indicate this to the Job Wrapper.
@@ -22,7 +17,6 @@
2217
import math
2318
import os
2419
import re
25-
import resource
2620
import socket
2721
import time
2822
from pathlib import Path
@@ -287,12 +281,14 @@ def _performChecks(self):
287281
self.parameters["LoadAverage"] = []
288282
self.parameters["LoadAverage"].append(loadAvg)
289283

290-
memoryUsed = self.getMemoryUsed()
291-
msg += f"MemUsed: {memoryUsed:.1f} kb "
292-
heartBeatDict["MemoryUsed"] = memoryUsed
284+
result = self.profiler.memoryUsage(withChildren=True)
285+
if not result["OK"]:
286+
self.log.warn("Could not get rss info from profiler", result["Message"])
287+
msg += f"MemUsed: {result['Value']:.1f} kb "
288+
heartBeatDict["MemoryUsed"] = result["Value"]
293289
if "MemoryUsed" not in self.parameters:
294290
self.parameters["MemoryUsed"] = []
295-
self.parameters["MemoryUsed"].append(memoryUsed)
291+
self.parameters["MemoryUsed"].append(result["Value"])
296292

297293
result = self.profiler.vSizeUsage(withChildren=True)
298294
if not result["OK"]:
@@ -304,16 +300,6 @@ def _performChecks(self):
304300
self.parameters["Vsize"].append(vsize)
305301
msg += f"Job Vsize: {vsize:.1f} kb "
306302

307-
result = self.profiler.memoryUsage(withChildren=True)
308-
if not result["OK"]:
309-
self.log.warn("Could not get rss info from profiler", result["Message"])
310-
else:
311-
rss = result["Value"] * 1024.0
312-
heartBeatDict["RSS"] = rss
313-
self.parameters.setdefault("RSS", [])
314-
self.parameters["RSS"].append(rss)
315-
msg += f"Job RSS: {rss:.1f} kb "
316-
317303
if "DiskSpace" not in self.parameters:
318304
self.parameters["DiskSpace"] = []
319305

@@ -744,11 +730,6 @@ def calibrate(self):
744730
self.initialValues["LoadAverage"] = float(os.getloadavg()[0])
745731
self.parameters["LoadAverage"] = []
746732

747-
memUsed = self.getMemoryUsed()
748-
749-
self.initialValues["MemoryUsed"] = memUsed
750-
self.parameters["MemoryUsed"] = []
751-
752733
result = self.profiler.vSizeUsage(withChildren=True)
753734
if not result["OK"]:
754735
self.log.warn("Could not get vSize info from profiler", result["Message"])
@@ -762,9 +743,8 @@ def calibrate(self):
762743
if not result["OK"]:
763744
self.log.warn("Could not get rss info from profiler", result["Message"])
764745
else:
765-
rss = result["Value"] * 1024.0
766-
self.initialValues["RSS"] = rss
767-
self.log.verbose("RSS(kb)", f"{rss:.1f}")
746+
self.initialValues["RSS"] = result["Value"]
747+
self.log.verbose("RSS(mb)", f"{result['Value']:.1f}")
768748
self.parameters["RSS"] = []
769749

770750
# We exclude fuse so that mountpoints can be cleaned up by automount after a period unused
@@ -968,14 +948,6 @@ def getNodeInformation(self):
968948

969949
return result
970950

971-
#############################################################################
972-
def getMemoryUsed(self):
973-
"""Obtains the memory used."""
974-
mem = (
975-
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
976-
)
977-
return float(mem)
978-
979951
#############################################################################
980952
def getDiskSpace(self, exclude=None):
981953
"""Obtains the available disk space."""

0 commit comments

Comments
 (0)