@@ -49,7 +49,11 @@ def _get_glance_client(self, project):
4949 return glanceclient .client .Client (2 , session = session )
5050
5151 def build_record (self , server , vo , images , flavors , users ):
52+ server_start = self ._get_server_start (server )
53+ server_end = self ._get_server_end (server )
54+
5255 status = self .vm_status (server .status )
56+
5357 image_id = None
5458 if server .image :
5559 image = images .get (server .image ['id' ])
@@ -62,8 +66,12 @@ def build_record(self, server, vo, images, flavors, users):
6266 if flavor :
6367 bench_name = flavor ["extra" ].get (CONF .benchmark_name_key )
6468 bench_value = flavor ["extra" ].get (CONF .benchmark_value_key )
69+ memory = flavor ["ram" ]
70+ cpu_count = flavor ["vcpus" ]
71+ disk = flavor ["disk" ] + flavor ["OS-FLV-EXT-DATA:ephemeral" ]
6572 else :
6673 bench_name = bench_value = None
74+ memory = cpu_count = disk = None
6775
6876 if not all ([bench_name , bench_value ]):
6977 if any ([bench_name , bench_value ]):
@@ -76,24 +84,40 @@ def build_record(self, server, vo, images, flavors, users):
7684 "file or set the correct properties in the "
7785 "flavor." % flavor )
7886
79- server_start = dateutil .parser .parse (server .created )
80- server_start = server_start .replace (tzinfo = None )
81-
8287 r = record .CloudRecord (server .id ,
8388 CONF .site_name ,
8489 server .name ,
8590 server .user_id ,
8691 server .tenant_id ,
8792 vo ,
8893 start_time = server_start ,
94+ end_time = server_end ,
8995 compute_service = CONF .service_name ,
9096 status = status ,
9197 image_id = image_id ,
9298 user_dn = users .get (server .user_id , None ),
9399 benchmark_type = bench_name ,
94- benchmark_value = bench_value )
100+ benchmark_value = bench_value ,
101+ memory = memory ,
102+ cpu_count = cpu_count ,
103+ disk = disk )
95104 return r
96105
106+ @staticmethod
107+ def _get_server_start (server ):
108+ # We use created, as the start_time may change upon certain actions!
109+ server_start = dateutil .parser .parse (server .created )
110+ server_start = server_start .replace (tzinfo = None )
111+ return server_start
112+
113+ @staticmethod
114+ def _get_server_end (server ):
115+ server_end = server .__getattr__ ('OS-SRV-USG:terminated_at' )
116+ if server_end :
117+ server_end = dateutil .parser .parse (server_end )
118+ server_end = server_end .replace (tzinfo = None )
119+ return server_end
120+
97121 def extract_for_project (self , project , extract_from , extract_to ):
98122 """Extract records for a project from given date querying nova.
99123
@@ -128,18 +152,30 @@ def extract_for_project(self, project, extract_from, extract_to):
128152
129153 vo = self .voms_map .get (project )
130154
131- # We cannot use 'changes-since' in the servers.list() API query, as it
132- # will only include changes that have changed its status after that
133- # date. However we cannot just get all the usages and then query
134- # server by server, as deleted servers are not returned by this
135- # servers.get() call. What we do is the following:
155+ # We cannot use just 'changes-since' in the servers.list() API query,
156+ # as it will only include servers that have changed its status after
157+ # that date. However we cannot just get all the usages and then query
158+ # server by server, as deleted servers are not returned by the usages
159+ # call. Moreover, Nova resets the start_time after performing some
160+ # actions on the server (rebuild, resize, rescue). If we use that time,
161+ # we may get a drop in the wall time, as a server that has been resized
162+ # in the middle of its lifetime will suddenly change its start_time
136163 #
137- # 1.- List all the deleted servers that changed after the start date
138- # 2.- Build the records for the period [start, end]
139- # 3.- Get all the usages
164+ # Therefore, what we do is the following (hackish approach)
165+ #
166+ # 1.- List all the servers that changed its status after the start time
167+ # for the reporting period
168+ # 2.- Build the records for the period [start, end] using those servers
169+ # 3.- Get all the usages, being aware that the start time may be wrong
140170 # 4.- Iter over the usages and:
141- # 4.1.- get information for non deleted servers
142- # 4.2.- do nothing with deleted servers, as we collected in in step (2)
171+ # 4.1.- get information for servers that are not returned by the query
172+ # in (1), for instance servers that have not changed it status.
173+ # We build then the records for those severs
174+ # 4.2.- For all the servers, adjust the CPU, memory and disk resources
175+ # as the flavor may not exist, but we can get those resources
176+ # from the usages API.
177+
178+ # Lets start
143179
144180 # 1.- List all the deleted servers from that period.
145181 servers = []
@@ -148,8 +184,7 @@ def extract_for_project(self, project, extract_from, extract_to):
148184 # Use a marker and iter over results until we do not have more to get
149185 while True :
150186 aux = nova .servers .list (
151- search_opts = {"changes-since" : extract_from ,
152- "deleted" : True },
187+ search_opts = {"changes-since" : extract_from },
153188 limit = limit ,
154189 marker = marker
155190 )
@@ -165,20 +200,40 @@ def extract_for_project(self, project, extract_from, extract_to):
165200 # (we do this manually as we cannot limit the query to a period, only
166201 # changes after start date).
167202 for server in servers :
168- server_start = dateutil .parser .parse (server .created )
169- server_start = server_start .replace (tzinfo = None )
203+
204+ server_start = self ._get_server_start (server )
205+ server_end = self ._get_server_end (server )
206+
170207 # Some servers may be deleted before 'extract_from' but updated
171208 # afterwards
172- server_end = server .__getattr__ ('OS-SRV-USG:terminated_at' )
173- if server_end :
174- server_end = dateutil .parser .parse (server_end )
175- server_end = server_end .replace (tzinfo = None )
176209 if (server_start > extract_to or
177210 (server_end and server_end < extract_from )):
178211 continue
212+
179213 records [server .id ] = self .build_record (server , vo , images ,
180214 flavors , users )
181215
216+ # Wall and CPU durations are absolute values, not deltas for the
217+ # reporting period. The nova API only gives use the usages for the
218+ # requested period, therefore we need to calculate the wall
219+ # duration by ourselves, then multiply by the nr of CPUs to get the
220+ # CPU duration.
221+
222+ # If the machine has not ended, report consumption until
223+ # extract_to, otherwise get its consuption by substracting ended -
224+ # started (done by the record).
225+ if server_end is None or server_end > extract_to :
226+ wall = extract_to - server_start
227+ wall = int (wall .total_seconds ())
228+ records [server .id ].wall_duration = wall
229+ # If we are republishing, the machine reports status completed,
230+ # but it is not True for this period, so we need to fake the
231+ # status and remove the end time for the server
232+ records [server .id ].end_time = None
233+
234+ if records [server .id ].status == "completed" :
235+ records [server .id ].status = self .vm_status ("active" )
236+
182237 # 3.- Get all the usages for the period
183238 start = extract_from
184239 aux = nova .usage .get (project_id , start , extract_to )
@@ -190,46 +245,50 @@ def extract_for_project(self, project, extract_from, extract_to):
190245 if usage ["instance_id" ] not in records :
191246 server = nova .servers .get (usage ["instance_id" ])
192247
193- server_start = dateutil .parser .parse (server .created )
194- server_start = server_start .replace (tzinfo = None )
248+ server_start = self ._get_server_start (server )
195249 if server_start > extract_to :
196250 continue
197- records [server .id ] = self .build_record (server , vo , images ,
198- flavors , users )
199- instance_id = usage ["instance_id" ]
200- records [instance_id ].memory = usage ["memory_mb" ]
201- records [instance_id ].cpu_count = usage ["vcpus" ]
202- records [instance_id ].disk = usage ["local_gb" ]
203-
204- # Start time must be the time when the machine was created
205- started = records [instance_id ].start_time
206-
207- # End time must ben the time when the machine was ended, but it may
208- # be none
209- if usage .get ('ended_at' , None ) is not None :
210- ended = dateutil .parser .parse (usage ["ended_at" ])
211- records [instance_id ].end_time = ended
212- else :
213- ended = None
214-
215- # Wall and CPU durations are absolute values, not deltas for the
216- # reporting period. The nova API only gives use the usages for the
217- # requested period, therefore we need to calculate the wall
218- # duration by ourselves, then multiply by the nr of CPUs to get the
219- # CPU duration.
220-
221- # If the machine has not ended, report consumption until
222- # extract_to, otherwise get its consuption by substracting ended -
223- # started.
224- if ended is not None and ended < extract_to :
225- wall = ended - started
226- else :
227- wall = extract_to - started
228-
229- wall = int (wall .total_seconds ())
230- records [instance_id ].wall_duration = wall
231-
232- cput = wall * usage ["vcpus" ]
233- records [instance_id ].cpu_duration = cput
251+ record = self .build_record (server , vo , images ,
252+ flavors , users )
253+
254+ server_start = record .start_time
255+
256+ # End time must ben the time when the machine was ended, but it
257+ # may be none
258+ if usage .get ('ended_at' , None ) is not None :
259+ server_end = dateutil .parser .parse (usage ["ended_at" ])
260+ record .end_time = server_end
261+ else :
262+ server_end = None
263+
264+ # Wall and CPU durations are absolute values, not deltas for
265+ # the reporting period. The nova API only gives use the usages
266+ # for the requested period, therefore we need to calculate the
267+ # wall duration by ourselves, then multiply by the nr of CPUs
268+ # to get the CPU duration.
269+
270+ # If the machine has not ended, report consumption until
271+ # extract_to, otherwise get its consuption by substracting
272+ # ended - started (done by the record).
273+ if server_end is None or server_end > extract_to :
274+ wall = extract_to - server_start
275+ wall = int (wall .total_seconds ())
276+ record .wall_duration = wall
277+ # If we are republishing, the machine reports status
278+ # completed, but it is not True for this period, so we need
279+ # to fake the status
280+ if records [server .id ].status == "completed" :
281+ records [server .id ].status = self .vm_status ("active" )
282+
283+ cput = wall * usage ["vcpus" ]
284+ record .cpu_duration = cput
285+
286+ records [server .id ] = record
287+
288+ # Adjust resources that may not be
289+ record = records [usage ["instance_id" ]]
290+ record .memory = usage ["memory_mb" ]
291+ record .cpu_count = usage ["vcpus" ]
292+ record .disk = usage ["local_gb" ]
234293
235294 return records
0 commit comments