Skip to content

Commit 098c6ba

Browse files
committed
feat (Tornado Monitoring): handlers report to Activity Monitoring
1 parent 2aab00b commit 098c6ba

File tree

2 files changed

+44
-7
lines changed

2 files changed

+44
-7
lines changed

src/DIRAC/Core/Tornado/Server/TornadoServer.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,7 @@ def startTornado(self):
211211
self.__report = self.__startReportToMonitoringLoop()
212212
# Response time
213213
# Starting monitoring, IOLoop waiting time in ms, __monitoringLoopDelay is defined in seconds
214-
tornado.ioloop.PeriodicCallback(
215-
self.__reportToMonitoring(self.__elapsedTime), self.__monitoringLoopDelay * 1000
216-
).start()
214+
tornado.ioloop.PeriodicCallback(self.__reportToMonitoring, self.__monitoringLoopDelay * 1000).start()
217215

218216
for port, app in self.__appsSettings.items():
219217
sLog.debug(" - %s" % "\n - ".join([f"{k} = {ssl_options[k]}" for k in ssl_options]))
@@ -235,7 +233,7 @@ def startTornado(self):
235233

236234
tornado.ioloop.IOLoop.current().start()
237235

238-
def __reportToMonitoring(self, responseTime):
236+
def __reportToMonitoring(self):
239237
"""
240238
Periodically reports to Monitoring
241239
"""
@@ -250,13 +248,24 @@ def __reportToMonitoring(self, responseTime):
250248
"ServiceName": "Tornado",
251249
"MemoryUsage": self.__report[2],
252250
"CpuPercentage": percentage,
253-
"ResponseTime": responseTime,
254251
}
255252
)
256253
self.activityMonitoringReporter.commit()
257254
# Save memory usage and save realtime/CPU time for next call
258255
self.__report = self.__startReportToMonitoringLoop()
259256

257+
# For each handler,
258+
for urlSpec in self.handlerManager.getHandlersDict().values():
259+
# If there is more than one URL, it's
260+
# most likely something that inherit from TornadoREST
261+
# so don't even try to monitor...
262+
if len(urlSpec["URLs"]) > 1:
263+
continue
264+
handler = urlSpec["URLs"][0].handler_class
265+
# If there is a Monitoring reporter, call commit on it
266+
if getattr(handler, "activityMonitoringReporter", None):
267+
handler.activityMonitoringReporter.commit()
268+
260269
def __startReportToMonitoringLoop(self):
261270
"""
262271
Snapshot of resources to be taken at the beginning

src/DIRAC/Core/Tornado/Server/private/BaseRequestHandler.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
import DIRAC
2121

2222
from DIRAC import gConfig, gLogger, S_OK, S_ERROR
23+
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
2324
from DIRAC.Core.Utilities import DErrno
2425
from DIRAC.Core.DISET.AuthManager import AuthManager
2526
from DIRAC.Core.Utilities.JEncode import decode, encode
27+
from DIRAC.Core.Utilities import Network, TimeUtilities
2628
from DIRAC.Core.Utilities.ReturnValues import isReturnStructure
2729
from DIRAC.Core.Security.X509Chain import X509Chain # pylint: disable=import-error
2830
from DIRAC.Resources.IdProvider.Utilities import getProvidersForInstance
@@ -475,6 +477,12 @@ def __initialize(cls, request):
475477

476478
cls.initializeHandler(cls._componentInfoDict)
477479

480+
cls.activityMonitoringReporter = None
481+
if "Monitoring" in Operations().getMonitoringBackends(monitoringType="ServiceMonitoring"):
482+
from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter
483+
484+
cls.activityMonitoringReporter = MonitoringReporter(monitoringType="ServiceMonitoring")
485+
478486
cls.__init_done = True
479487

480488
return S_OK()
@@ -662,26 +670,46 @@ def on_finish(self):
662670
Called after the end of HTTP request.
663671
Log the request duration
664672
"""
665-
elapsedTime = 1000.0 * self.request.request_time()
673+
elapsedTime = self.request.request_time()
674+
666675
credentials = self.srv_getFormattedRemoteCredentials()
667676

668677
argsString = f"OK {self._status_code}"
678+
monitoringRetStatus = "Unknown"
669679
# Finish with DIRAC result
670680
if isReturnStructure(self.__result):
671681
if self.__result["OK"]:
672682
argsString = "OK"
683+
monitoringRetStatus = "OK"
673684
else:
674685
argsString = f"ERROR: {self.__result['Message']}"
686+
monitoringRetStatus = "ERROR"
675687
if callStack := self.__result.pop("CallStack", None):
676688
argsString += "\n" + "".join(callStack)
677689
# If bad HTTP status code
678690
if self._status_code >= 400:
679691
argsString = f"ERROR {self._status_code}: {self._reason}"
680692

681693
self.log.notice(
682-
"Returning response", f"{credentials} {self._fullComponentName} ({elapsedTime:.2f} ms) {argsString}"
694+
"Returning response",
695+
f"{credentials} {self._fullComponentName} ({1000.0 * elapsedTime:.2f} ms) {argsString}",
683696
)
684697

698+
if self.activityMonitoringReporter:
699+
record = {
700+
"timestamp": int(TimeUtilities.toEpochMilliSeconds()),
701+
"Host": Network.getFQDN(),
702+
"ServiceName": "_".join(self._fullComponentName.split("/")),
703+
"Location": self.request.uri,
704+
"ResponseTime": elapsedTime,
705+
# Take the method name from the POST call
706+
"MethodName": self.request.arguments.get("method", ["Unknown"])[0],
707+
"Protocol": "https",
708+
"Status": monitoringRetStatus,
709+
}
710+
711+
self.activityMonitoringReporter.addRecord(record)
712+
685713
def _gatherPeerCredentials(self, grants: list = None) -> dict:
686714
"""Return a dictionary designed to work with the :py:class:`AuthManager <DIRAC.Core.DISET.AuthManager.AuthManager>`,
687715
already written for DISET and re-used for HTTPS.

0 commit comments

Comments
 (0)