1
- """ StatesAccountingAgent sends periodically numbers of jobs in various states for various
1
+ """ StatesAccountingAgent sends periodically numbers of jobs and pilots in various states for various
2
2
sites to the Monitoring system to create historical plots.
3
3
4
4
.. literalinclude:: ../ConfigTemplate.cfg
8
8
:caption: StatesAccountingAgent options
9
9
"""
10
10
from DIRAC import S_OK , S_ERROR
11
+ from DIRAC .ConfigurationSystem .Client .Helpers .Operations import Operations
11
12
from DIRAC .Core .Base .AgentModule import AgentModule
12
13
from DIRAC .Core .Utilities import Time
13
14
from DIRAC .AccountingSystem .Client .Types .WMSHistory import WMSHistory
14
15
from DIRAC .AccountingSystem .Client .DataStoreClient import DataStoreClient
15
16
from DIRAC .MonitoringSystem .Client .MonitoringReporter import MonitoringReporter
16
17
from DIRAC .WorkloadManagementSystem .DB .JobDB import JobDB
18
+ from DIRAC .WorkloadManagementSystem .DB .PilotAgentsDB import PilotAgentsDB
17
19
18
20
19
21
class StatesAccountingAgent (AgentModule ):
20
22
"""Agent that every 15 minutes will report
21
23
to the AccountingDB (MySQL) or the Monitoring DB (ElasticSearch), or both,
22
24
a snapshot of the JobDB.
25
+ Also sends a snapshot of PilotAgentsDB to Monitoring.
23
26
"""
24
27
28
+ # WMSHistory fields
25
29
__summaryKeyFieldsMapping = [
26
30
"Status" ,
27
31
"Site" ,
@@ -36,24 +40,29 @@ class StatesAccountingAgent(AgentModule):
36
40
__summaryValueFieldsMapping = ["Jobs" , "Reschedules" ]
37
41
__renameFieldsMapping = {"JobType" : "JobSplitType" }
38
42
43
+ # PilotsHistory fields
44
+ __pilotKeyFields = ["TaskQueueID" , "GridSite" , "GridType" , "Status" ]
45
+ __pilotValueFields = ["Pilots" ]
46
+
39
47
def initialize (self ):
40
48
"""Standard initialization"""
41
49
# This agent will always loop every 15 minutes
42
50
self .am_setOption ("PollingTime" , 900 )
43
51
44
52
self .backends = self .am_getOption ("Backends" , "Accounting" ).replace (" " , "" ).split ("," )
45
- messageQueue = self . am_getOption ( "MessageQueue " , "dirac.wmshistory " )
53
+ self . monitoringEnabled = Operations (). getValue ( "monitoringEnabled " , "False " )
46
54
47
- self .log . info ( "Committing to %s backend" % "and " . join ( self . backends ) )
55
+ messageQueue = self .am_getOption ( "MessageQueue" , "dirac.wmshistory" )
48
56
49
57
self .datastores = {} # For storing the clients to Accounting and Monitoring
50
58
51
59
if "Accounting" in self .backends :
52
60
self .datastores ["Accounting" ] = DataStoreClient (retryGraceTime = 900 )
53
- if "Monitoring" in self .backends :
61
+ if "Monitoring" in self .backends or self . monitoringEnabled :
54
62
self .datastores ["Monitoring" ] = MonitoringReporter (
55
63
monitoringType = "WMSHistory" , failoverQueueName = messageQueue
56
64
)
65
+ self .pilotReporter = MonitoringReporter (monitoringType = "PilotsHistory" )
57
66
58
67
self .__jobDBFields = []
59
68
for field in self .__summaryKeyFieldsMapping :
@@ -66,17 +75,50 @@ def initialize(self):
66
75
67
76
def execute (self ):
68
77
"""Main execution method"""
69
- # Get the WMS Snapshot!
78
+
79
+ # PilotsHistory to Monitoring
80
+ if self .monitoringEnabled :
81
+ result = PilotAgentsDB .getSummarySnapshot (self .__pilotKeyFields )
82
+ now = Time .dateTime ()
83
+ if not result ["OK" ]:
84
+ self .log .error (
85
+ "Can't get the PilotAgentsDB summary" , "%s: won't commit at this cycle" % result ["Message" ]
86
+ )
87
+ return S_ERROR ()
88
+
89
+ values = result ["Value" ][1 ]
90
+ for record in values :
91
+ record = record [1 :]
92
+ rD = {}
93
+ for iP in range (len (self .__pilotKeyFields )):
94
+ rD [self .__pilotKeyFields [iP ]] = record [iP ]
95
+ record = record [len (self .__pilotKeyFields ) :]
96
+ for iP in range (len (self .__pilotValueFields )):
97
+ rD [self .__pilotValueFields [iP ]] = int (record [iP ])
98
+ rD ["timestamp" ] = int (Time .toEpoch (now ))
99
+ self .log .verbose ("Adding following PilotsHistory record to Reporter: \n " , rD )
100
+ self .pilotReporter .addRecord (rD )
101
+
102
+ self .log .info ("Committing PilotsHistory to Monitoring" )
103
+ result = self .pilotReporter .commit ()
104
+ if not result ["OK" ]:
105
+ self .log .error ("Could not commit PilotsHistory to Monitoring" )
106
+ return S_ERROR ()
107
+ self .log .verbose ("Done committing PilotsHistory to Monitoring" )
108
+
109
+ # WMSHistory to Monitoring or Accounting
110
+ self .log .info ("Committing WMSHistory to %s backend" % "and " .join (self .backends ))
70
111
result = JobDB ().getSummarySnapshot (self .__jobDBFields )
71
112
now = Time .dateTime ()
72
113
if not result ["OK" ]:
73
- self .log .error ("Can't get the JobDB summary" , "%s: won't commit at this cycle" % result ["Message" ])
114
+ self .log .error (
115
+ "Can't get the JobDB summary" , "%s: won't commit WMSHistory at this cycle" % result ["Message" ]
116
+ )
74
117
return S_ERROR ()
75
118
76
- # Now we try to commit
77
119
values = result ["Value" ][1 ]
78
120
79
- self .log .info ("Start sending records" )
121
+ self .log .info ("Start sending WMSHistory records" )
80
122
for record in values :
81
123
record = record [1 :]
82
124
rD = {}
@@ -101,16 +143,16 @@ def execute(self):
101
143
acWMS .setValuesFromDict (rD )
102
144
retVal = acWMS .checkValues ()
103
145
if not retVal ["OK" ]:
104
- self .log .error ("Invalid accounting record " , "%s -> %s" % (retVal ["Message" ], rD ))
146
+ self .log .error ("Invalid WMSHistory accounting record " , "%s -> %s" % (retVal ["Message" ], rD ))
105
147
else :
106
148
self .datastores ["Accounting" ].addRegister (acWMS )
107
149
108
150
for backend , datastore in self .datastores .items ():
109
- self .log .info ("Committing to %s backend" % backend )
151
+ self .log .info ("Committing WMSHistory records to %s backend" % backend )
110
152
result = datastore .commit ()
111
153
if not result ["OK" ]:
112
- self .log .error ("Couldn't commit WMS history to %s" % backend , result ["Message" ])
154
+ self .log .error ("Couldn't commit WMSHistory to %s" % backend , result ["Message" ])
113
155
return S_ERROR ()
114
- self .log .verbose ("Done committing to %s backend" % backend )
156
+ self .log .verbose ("Done committing WMSHistory to %s backend" % backend )
115
157
116
158
return S_OK ()
0 commit comments