Skip to content

Commit 15bbfa0

Browse files
committed
docs: Updated documentation for PilotsHistory
1 parent 9c5a319 commit 15bbfa0

File tree

3 files changed

+20
-12
lines changed

3 files changed

+20
-12
lines changed

docs/source/AdministratorGuide/Systems/MonitoringSystem/index.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ Monitoring System
1010
Overview
1111
=========
1212

13-
The Monitoring system is used to monitor various components of DIRAC. Currently, we have five monitoring types:
13+
The Monitoring system is used to monitor various components of DIRAC. Currently, we have several monitoring types:
1414

1515
- WMSHistory: for monitoring the DIRAC WMS
16+
- PilotsHistory: for monitoring of DIRAC pilots
1617
- Component Monitoring: for monitoring DIRAC components such as services, agents, etc.
1718
- RMS Monitoring: for monitoring the DIRAC RequestManagement System (mostly the Request Executing Agent).
1819
- PilotSubmission Monitoring: for monitoring the DIRAC pilot submission statistics from SiteDirector agents
@@ -122,6 +123,11 @@ You can configure the MQ in the local dirac.cfg file where the agent is running:
122123
Note: the JSON file already contains the index patterns needed for the visualizations. You may need to adapt the index patterns to your existing ones.
123124

124125

126+
Enable PilotsHistory monitoring
127+
===============================
128+
In order to enable PilotsHistory monitoring you need to set the flag ``monitoringEnabled = True`` in Operations/Defaults.
129+
130+
125131
Enable Component monitoring
126132
===========================
127133

src/DIRAC/MonitoringSystem/Client/Types/PilotsHistory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __init__(self):
1919

2020
self.keyFields = ["TaskQueueID", "GridSite", "GridType", "Status"]
2121

22-
self.monitoringFields = ["Pilots"]
22+
self.monitoringFields = ["NumOfPilots"]
2323

2424
self.index = "pilotshistory_index"
2525

@@ -29,7 +29,7 @@ def __init__(self):
2929
"GridSite": {"type": "keyword"},
3030
"GridType": {"type": "keyword"},
3131
"Status": {"type": "keyword"},
32-
"Pilots": {"type": "long"},
32+
"NumOfPilots": {"type": "long"},
3333
}
3434
)
3535

src/DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ class StatesAccountingAgent(AgentModule):
4242

4343
# PilotsHistory fields
4444
__pilotKeyFields = ["TaskQueueID", "GridSite", "GridType", "Status"]
45-
__pilotValueFields = ["Pilots"]
45+
__pilotValueFields = ["NumOfPilots"]
4646

4747
def initialize(self):
4848
"""Standard initialization"""
4949
# This agent will always loop every 15 minutes
5050
self.am_setOption("PollingTime", 900)
5151

5252
self.backends = self.am_getOption("Backends", "Accounting").replace(" ", "").split(",")
53-
self.monitoringEnabled = Operations().getValue("monitoringEnabled", "False")
53+
self.monitoringEnabled = Operations().getValue("monitoringEnabled", False)
5454

5555
messageQueue = self.am_getOption("MessageQueue", "dirac.wmshistory")
5656

@@ -62,7 +62,7 @@ def initialize(self):
6262
self.datastores["Monitoring"] = MonitoringReporter(
6363
monitoringType="WMSHistory", failoverQueueName=messageQueue
6464
)
65-
self.pilotReporter = MonitoringReporter(monitoringType="PilotsHistory")
65+
self.pilotReporter = MonitoringReporter(monitoringType="PilotsHistory", failoverQueueName=messageQueue)
6666

6767
self.__jobDBFields = []
6868
for field in self.__summaryKeyFieldsMapping:
@@ -77,32 +77,34 @@ def execute(self):
7777
"""Main execution method"""
7878

7979
# PilotsHistory to Monitoring
80+
self.log.info("Committing PilotsHistory to Monitoring")
8081
if self.monitoringEnabled:
8182
result = PilotAgentsDB.getSummarySnapshot(self.__pilotKeyFields)
8283
now = Time.dateTime()
8384
if not result["OK"]:
8485
self.log.error(
85-
"Can't get the PilotAgentsDB summary", "%s: won't commit at this cycle" % result["Message"]
86+
"Can't get the PilotAgentsDB summary",
87+
"%s: won't commit PilotsHistory at this cycle" % result["Message"],
8688
)
8789
return S_ERROR()
8890

8991
values = result["Value"][1]
9092
for record in values:
9193
record = record[1:]
9294
rD = {}
93-
for iP in range(len(self.__pilotKeyFields)):
95+
for iP in enumerate(self.__pilotKeyFields):
9496
rD[self.__pilotKeyFields[iP]] = record[iP]
9597
record = record[len(self.__pilotKeyFields) :]
96-
for iP in range(len(self.__pilotValueFields)):
98+
for iP in enumerate(self.__pilotValueFields):
9799
rD[self.__pilotValueFields[iP]] = int(record[iP])
98100
rD["timestamp"] = int(Time.toEpoch(now))
99-
self.log.verbose("Adding following PilotsHistory record to Reporter: \n", rD)
101+
self.log.debug("Adding following PilotsHistory record to Reporter: \n", rD)
100102
self.pilotReporter.addRecord(rD)
101103

102-
self.log.info("Committing PilotsHistory to Monitoring")
104+
self.log.info("Committing to Monitoring...")
103105
result = self.pilotReporter.commit()
104106
if not result["OK"]:
105-
self.log.error("Could not commit PilotsHistory to Monitoring")
107+
self.log.error("Could not commit to Monitoring")
106108
return S_ERROR()
107109
self.log.verbose("Done committing PilotsHistory to Monitoring")
108110

0 commit comments

Comments
 (0)