Skip to content

Commit 0e8cadb

Browse files
committed
Improve and fix duration calculation and server status
This change introduces several changes and fixes, namely: - We now use properties for the duration calculation. When creating a record, the durations are calculated using properties, therefore no calculations are needed outside the record. - The duration reported for a server was negative in some cases. This was due to a couple of things. First of all, when some actions are performed on a server (resize, rebuild, evacuate), the server's start time is reset to the action's timestamp. Secondly, the way the nova API handles the changes-since and servers.list() calls and filters was not returing some servers, we now handle this as well. - When we were requesting old records (i.e. a republish) the server's status was set to "complete", however this was wrong, it has to be started. This change requires a complete republishing of all records. Fixes #62 Fixes #61
1 parent 6df46e9 commit 0e8cadb

File tree

2 files changed

+150
-61
lines changed

2 files changed

+150
-61
lines changed

caso/extract/nova.py

Lines changed: 120 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ def _get_glance_client(self, project):
4949
return glanceclient.client.Client(2, session=session)
5050

5151
def build_record(self, server, vo, images, flavors, users):
52+
server_start = self._get_server_start(server)
53+
server_end = self._get_server_end(server)
54+
5255
status = self.vm_status(server.status)
56+
5357
image_id = None
5458
if server.image:
5559
image = images.get(server.image['id'])
@@ -62,8 +66,12 @@ def build_record(self, server, vo, images, flavors, users):
6266
if flavor:
6367
bench_name = flavor["extra"].get(CONF.benchmark_name_key)
6468
bench_value = flavor["extra"].get(CONF.benchmark_value_key)
69+
memory = flavor["ram"]
70+
cpu_count = flavor["vcpus"]
71+
disk = flavor["disk"] + flavor["OS-FLV-EXT-DATA:ephemeral"]
6572
else:
6673
bench_name = bench_value = None
74+
memory = cpu_count = disk = None
6775

6876
if not all([bench_name, bench_value]):
6977
if any([bench_name, bench_value]):
@@ -76,24 +84,40 @@ def build_record(self, server, vo, images, flavors, users):
7684
"file or set the correct properties in the "
7785
"flavor." % flavor)
7886

79-
server_start = dateutil.parser.parse(server.created)
80-
server_start = server_start.replace(tzinfo=None)
81-
8287
r = record.CloudRecord(server.id,
8388
CONF.site_name,
8489
server.name,
8590
server.user_id,
8691
server.tenant_id,
8792
vo,
8893
start_time=server_start,
94+
end_time=server_end,
8995
compute_service=CONF.service_name,
9096
status=status,
9197
image_id=image_id,
9298
user_dn=users.get(server.user_id, None),
9399
benchmark_type=bench_name,
94-
benchmark_value=bench_value)
100+
benchmark_value=bench_value,
101+
memory=memory,
102+
cpu_count=cpu_count,
103+
disk=disk)
95104
return r
96105

106+
@staticmethod
107+
def _get_server_start(server):
108+
# We use created, as the start_time may change upon certain actions!
109+
server_start = dateutil.parser.parse(server.created)
110+
server_start = server_start.replace(tzinfo=None)
111+
return server_start
112+
113+
@staticmethod
114+
def _get_server_end(server):
115+
server_end = server.__getattr__('OS-SRV-USG:terminated_at')
116+
if server_end:
117+
server_end = dateutil.parser.parse(server_end)
118+
server_end = server_end.replace(tzinfo=None)
119+
return server_end
120+
97121
def extract_for_project(self, project, extract_from, extract_to):
98122
"""Extract records for a project from given date querying nova.
99123
@@ -128,18 +152,30 @@ def extract_for_project(self, project, extract_from, extract_to):
128152

129153
vo = self.voms_map.get(project)
130154

131-
# We cannot use 'changes-since' in the servers.list() API query, as it
132-
# will only include changes that have changed its status after that
133-
# date. However we cannot just get all the usages and then query
134-
# server by server, as deleted servers are not returned by this
135-
# servers.get() call. What we do is the following:
155+
# We cannot use just 'changes-since' in the servers.list() API query,
156+
# as it will only include servers that have changed its status after
157+
# that date. However we cannot just get all the usages and then query
158+
# server by server, as deleted servers are not returned by the usages
159+
# call. Moreover, Nova resets the start_time after performing some
160+
# actions on the server (rebuild, resize, rescue). If we use that time,
161+
# we may get a drop in the wall time, as a server that has been resized
162+
# in the middle of its lifetime will suddenly change its start_time
136163
#
137-
# 1.- List all the deleted servers that changed after the start date
138-
# 2.- Build the records for the period [start, end]
139-
# 3.- Get all the usages
164+
# Therefore, what we do is the following (hackish approach)
165+
#
166+
# 1.- List all the servers that changed its status after the start time
167+
# for the reporting period
168+
# 2.- Build the records for the period [start, end] using those servers
169+
# 3.- Get all the usages, being aware that the start time may be wrong
140170
# 4.- Iter over the usages and:
141-
# 4.1.- get information for non deleted servers
142-
# 4.2.- do nothing with deleted servers, as we collected in in step (2)
171+
# 4.1.- get information for servers that are not returned by the query
172+
# in (1), for instance servers that have not changed it status.
173+
# We build then the records for those severs
174+
# 4.2.- For all the servers, adjust the CPU, memory and disk resources
175+
# as the flavor may not exist, but we can get those resources
176+
# from the usages API.
177+
178+
# Lets start
143179

144180
# 1.- List all the deleted servers from that period.
145181
servers = []
@@ -148,8 +184,7 @@ def extract_for_project(self, project, extract_from, extract_to):
148184
# Use a marker and iter over results until we do not have more to get
149185
while True:
150186
aux = nova.servers.list(
151-
search_opts={"changes-since": extract_from,
152-
"deleted": True},
187+
search_opts={"changes-since": extract_from},
153188
limit=limit,
154189
marker=marker
155190
)
@@ -165,20 +200,40 @@ def extract_for_project(self, project, extract_from, extract_to):
165200
# (we do this manually as we cannot limit the query to a period, only
166201
# changes after start date).
167202
for server in servers:
168-
server_start = dateutil.parser.parse(server.created)
169-
server_start = server_start.replace(tzinfo=None)
203+
204+
server_start = self._get_server_start(server)
205+
server_end = self._get_server_end(server)
206+
170207
# Some servers may be deleted before 'extract_from' but updated
171208
# afterwards
172-
server_end = server.__getattr__('OS-SRV-USG:terminated_at')
173-
if server_end:
174-
server_end = dateutil.parser.parse(server_end)
175-
server_end = server_end.replace(tzinfo=None)
176209
if (server_start > extract_to or
177210
(server_end and server_end < extract_from)):
178211
continue
212+
179213
records[server.id] = self.build_record(server, vo, images,
180214
flavors, users)
181215

216+
# Wall and CPU durations are absolute values, not deltas for the
217+
# reporting period. The nova API only gives use the usages for the
218+
# requested period, therefore we need to calculate the wall
219+
# duration by ourselves, then multiply by the nr of CPUs to get the
220+
# CPU duration.
221+
222+
# If the machine has not ended, report consumption until
223+
# extract_to, otherwise get its consuption by substracting ended -
224+
# started (done by the record).
225+
if server_end is None or server_end > extract_to:
226+
wall = extract_to - server_start
227+
wall = int(wall.total_seconds())
228+
records[server.id].wall_duration = wall
229+
# If we are republishing, the machine reports status completed,
230+
# but it is not True for this period, so we need to fake the
231+
# status and remove the end time for the server
232+
records[server.id].end_time = None
233+
234+
if records[server.id].status == "completed":
235+
records[server.id].status = self.vm_status("active")
236+
182237
# 3.- Get all the usages for the period
183238
start = extract_from
184239
aux = nova.usage.get(project_id, start, extract_to)
@@ -190,46 +245,50 @@ def extract_for_project(self, project, extract_from, extract_to):
190245
if usage["instance_id"] not in records:
191246
server = nova.servers.get(usage["instance_id"])
192247

193-
server_start = dateutil.parser.parse(server.created)
194-
server_start = server_start.replace(tzinfo=None)
248+
server_start = self._get_server_start(server)
195249
if server_start > extract_to:
196250
continue
197-
records[server.id] = self.build_record(server, vo, images,
198-
flavors, users)
199-
instance_id = usage["instance_id"]
200-
records[instance_id].memory = usage["memory_mb"]
201-
records[instance_id].cpu_count = usage["vcpus"]
202-
records[instance_id].disk = usage["local_gb"]
203-
204-
# Start time must be the time when the machine was created
205-
started = records[instance_id].start_time
206-
207-
# End time must ben the time when the machine was ended, but it may
208-
# be none
209-
if usage.get('ended_at', None) is not None:
210-
ended = dateutil.parser.parse(usage["ended_at"])
211-
records[instance_id].end_time = ended
212-
else:
213-
ended = None
214-
215-
# Wall and CPU durations are absolute values, not deltas for the
216-
# reporting period. The nova API only gives use the usages for the
217-
# requested period, therefore we need to calculate the wall
218-
# duration by ourselves, then multiply by the nr of CPUs to get the
219-
# CPU duration.
220-
221-
# If the machine has not ended, report consumption until
222-
# extract_to, otherwise get its consuption by substracting ended -
223-
# started.
224-
if ended is not None and ended < extract_to:
225-
wall = ended - started
226-
else:
227-
wall = extract_to - started
228-
229-
wall = int(wall.total_seconds())
230-
records[instance_id].wall_duration = wall
231-
232-
cput = wall * usage["vcpus"]
233-
records[instance_id].cpu_duration = cput
251+
record = self.build_record(server, vo, images,
252+
flavors, users)
253+
254+
server_start = record.start_time
255+
256+
# End time must ben the time when the machine was ended, but it
257+
# may be none
258+
if usage.get('ended_at', None) is not None:
259+
server_end = dateutil.parser.parse(usage["ended_at"])
260+
record.end_time = server_end
261+
else:
262+
server_end = None
263+
264+
# Wall and CPU durations are absolute values, not deltas for
265+
# the reporting period. The nova API only gives use the usages
266+
# for the requested period, therefore we need to calculate the
267+
# wall duration by ourselves, then multiply by the nr of CPUs
268+
# to get the CPU duration.
269+
270+
# If the machine has not ended, report consumption until
271+
# extract_to, otherwise get its consuption by substracting
272+
# ended - started (done by the record).
273+
if server_end is None or server_end > extract_to:
274+
wall = extract_to - server_start
275+
wall = int(wall.total_seconds())
276+
record.wall_duration = wall
277+
# If we are republishing, the machine reports status
278+
# completed, but it is not True for this period, so we need
279+
# to fake the status
280+
if records[server.id].status == "completed":
281+
records[server.id].status = self.vm_status("active")
282+
283+
cput = wall * usage["vcpus"]
284+
record.cpu_duration = cput
285+
286+
records[server.id] = record
287+
288+
# Adjust resources that may not be
289+
record = records[usage["instance_id"]]
290+
record.memory = usage["memory_mb"]
291+
record.cpu_count = usage["vcpus"]
292+
record.disk = usage["local_gb"]
234293

235294
return records

caso/record.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,36 @@ def as_dict(self, version=None):
130130
return {k: v for k, v in self.map.items()
131131
if k in self._version_field_map[version]}
132132

133+
@property
134+
def wall_duration(self):
135+
duration = None
136+
if self._wall_duration is not None:
137+
duration = self._wall_duration
138+
elif None not in (self._start_time, self._end_time):
139+
duration = (self.end_time - self.start_time).total_seconds()
140+
return int(duration) if duration else duration
141+
142+
@wall_duration.setter
143+
def wall_duration(self, value):
144+
if value and not isinstance(value, (int, float)):
145+
raise ValueError("Duration must be a number")
146+
self._wall_duration = value
147+
148+
@property
149+
def cpu_duration(self):
150+
duration = None
151+
if self._cpu_duration is not None:
152+
duration = self._cpu_duration
153+
elif self.wall_duration is not None and self.cpu_count:
154+
duration = self.wall_duration * self.cpu_count
155+
return int(duration) if duration else duration
156+
157+
@cpu_duration.setter
158+
def cpu_duration(self, value):
159+
if value and not isinstance(value, (int, float)):
160+
raise ValueError("Duration must be a number")
161+
self._cpu_duration = value
162+
133163
@property
134164
def start_time(self):
135165
return self._start_time

0 commit comments

Comments
 (0)