Skip to content

Commit 54a2c63

Browse files
committed
Finished working prototype of gantt chart generator
1 parent 13fddc8 commit 54a2c63

File tree

1 file changed

+195
-95
lines changed

1 file changed

+195
-95
lines changed

nipype/utils/draw_gantt_chart.py

Lines changed: 195 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,73 @@
1010
import datetime
1111
import random
1212
import pandas as pd
13-
import dateutil
1413
from collections import OrderedDict
1514

1615

17-
def log_to_events(logfile):
18-
events = []
19-
with open(logfile, 'r') as content:
20-
#read file separating each line
21-
content = content.read()
22-
lines = content.split('\n')
16+
def create_event_dict(start_time, nodes_list):
17+
'''
18+
Function to generate a dictionary of event (start/finish) nodes
19+
from the nodes list
2320
24-
for l in lines:
25-
event = None
26-
try:
27-
event = json.loads(l)
28-
except Exception, e:
29-
pass
21+
Parameters
22+
----------
23+
start_time : datetime.datetime
24+
a datetime object of the pipeline start time
25+
nodes_list : list
26+
a list of the node dictionaries that were run in the pipeline
3027
31-
if not event: continue
28+
Returns
29+
-------
30+
events : dictionary
31+
a dictionary where the key is the timedelta from the start of
32+
the pipeline execution to the value node it accompanies
33+
'''
3234

33-
if 'start' in event:
34-
event['type'] = 'start'
35-
event['time'] = event['start']
36-
else:
37-
event['type'] = 'finish'
38-
event['time'] = event['finish']
35+
# Import packages
36+
import copy
3937

40-
events.append(event)
38+
events = {}
39+
for node in nodes_list:
40+
# Format node fields
41+
try:
42+
estimated_threads = float(node['num_threads'])
43+
except:
44+
estimated_threads = 1
45+
try:
46+
estimated_memory_gb = float(node['estimated_memory_gb'])
47+
except:
48+
estimated_memory_gb = 1.0
49+
try:
50+
runtime_threads = float(node['runtime_threads'])
51+
except:
52+
runtime_threads = 0
53+
try:
54+
runtime_memory_gb = float(node['runtime_memory_gb'])
55+
except:
56+
runtime_memory_gb = 0.0
57+
58+
# Init and format event-based nodes
59+
node['estimated_threads'] = estimated_threads
60+
node['estimated_memory_gb'] = estimated_memory_gb
61+
node['runtime_threads'] = runtime_threads
62+
node['runtime_memory_gb'] = runtime_memory_gb
63+
start_node = node
64+
finish_node = copy.deepcopy(node)
65+
start_node['event'] = 'start'
66+
finish_node['event'] = 'finish'
67+
68+
# Get dictionary key
69+
start_delta = (node['start'] - start_time).total_seconds()
70+
finish_delta = (node['finish'] - start_time).total_seconds()
71+
72+
# Populate dictionary
73+
if events.has_key(start_delta) or events.has_key(finish_delta):
74+
err_msg = 'Event logged twice or events started at exact same time!'
75+
raise KeyError(err_msg)
76+
events[start_delta] = start_node
77+
events[finish_delta] = finish_node
78+
79+
# Return events dictionary
4180
return events
4281

4382

@@ -65,7 +104,6 @@ def log_to_dict(logfile):
65104
unifinished_nodes = [] #all start nodes that dont have a finish yet
66105

67106
with open(logfile, 'r') as content:
68-
69107
#read file separating each line
70108
content = content.read()
71109
lines = content.split('\n')
@@ -98,7 +136,8 @@ def log_to_dict(logfile):
98136
if aux['id'] == node['id'] and aux['name'] == node['name'] \
99137
and aux['start'] < node['finish']:
100138
node['start'] = aux['start']
101-
node['duration'] = (node['finish'] - node['start']).total_seconds()
139+
node['duration'] = \
140+
(node['finish'] - node['start']).total_seconds()
102141

103142
unifinished_nodes.remove(aux)
104143
nodes_list.append(node)
@@ -113,29 +152,54 @@ def log_to_dict(logfile):
113152
n['duration'] = (n['finish'] - n['start']).total_seconds()
114153
nodes_list.append(n)
115154

155+
# Return list of nodes
116156
return nodes_list
117157

118158

119-
def calculate_resources(events, resource):
159+
def calculate_resource_timeseries(events, resource):
160+
'''
161+
Given as event dictionary, calculate the resources used
162+
as a timeseries
163+
164+
Parameters
165+
----------
166+
events : dictionary
167+
a dictionary of event-based node dictionaries of the workflow
168+
execution statistics
169+
resource : string
170+
the resource of interest to return the time-series of;
171+
e.g. 'runtime_memory_gb', 'estimated_threads', etc
172+
173+
Returns
174+
-------
175+
time_series : pandas Series
176+
a pandas Series object that contains timestamps as the indices
177+
and the resource amount as values
178+
'''
179+
180+
# Init variables
120181
res = OrderedDict()
121-
for event in events:
122-
all_res = 0.0
123-
if event['type'] == "start":
182+
all_res = 0.0
183+
184+
# Iterate through the events
185+
for tdelta, event in sorted(events.items()):
186+
if event['event'] == "start":
124187
if resource in event and event[resource] != 'Unkown':
125188
all_res += float(event[resource])
126189
current_time = event['start'];
127-
elif event['type'] == "finish":
190+
elif event['event'] == "finish":
128191
if resource in event and event[resource] != 'Unkown':
129-
all_res += float(event[resource])
192+
all_res -= float(event[resource])
130193
current_time = event['finish'];
131194
res[current_time] = all_res
132195

133-
timestamps = [dateutil.parser.parse(ts) for ts in res.keys()]
134-
time_series = pd.Series(data=res.values(), index=timestamps)
135-
#TODO: pandas is removing all data values somewhere here
136-
#interp_seq = pd.date_range(time_series.index[0], time_series.index[-1], freq='U')
137-
#interp_time_series = time_series.reindex(interp_seq)
138-
#interp_time_series = interp_time_series.fillna(method='ffill')
196+
# Formulate the pandas timeseries
197+
time_series = pd.Series(data=res.values(), index=res.keys())
198+
# Downsample where there is only value-diff
199+
ts_diff = time_series.diff()
200+
time_series = time_series[ts_diff!=0]
201+
202+
# Return the new time series
139203
return time_series
140204

141205

@@ -186,7 +250,8 @@ def draw_lines(start, total_duration, minute_scale, scale):
186250
return result
187251

188252

189-
def draw_nodes(start, nodes_list, cores, minute_scale, space_between_minutes, colors):
253+
def draw_nodes(start, nodes_list, cores, minute_scale, space_between_minutes,
254+
colors):
190255
'''
191256
Function to return the html-string of the node drawings for the
192257
gantt chart
@@ -269,8 +334,8 @@ def draw_nodes(start, nodes_list, cores, minute_scale, space_between_minutes, co
269334
'node_finish' : node_finish.strftime("%Y-%m-%d %H:%M:%S")}
270335
# Create new node string
271336
new_node = "<div class='node' style='left:%(left)spx;top:%(offset)spx;"\
272-
"height:%(scale_duration)spx;background-color:%(color)s;"\
273-
"'title='%(node_name)s\nduration:%(node_dur)s\n"\
337+
"height:%(scale_duration)spx;background-color:%(color)s;'"\
338+
"title='%(node_name)s\nduration:%(node_dur)s\n"\
274339
"start:%(node_start)s\nend:%(node_finish)s'></div>" % \
275340
node_dict
276341

@@ -280,49 +345,79 @@ def draw_nodes(start, nodes_list, cores, minute_scale, space_between_minutes, co
280345
# Return html string for nodes
281346
return result
282347

283-
284-
def draw_thread_bar(threads,space_between_minutes, minute_scale, color):
285-
result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
286-
287-
scale = float(space_between_minutes/float(minute_scale))
288-
space_between_minutes = float(space_between_minutes/60.0)
289-
290-
for i in range(len(threads)):
291-
#print threads[i]
292-
width = threads[i] * 10
293-
t = (float(i*scale*minute_scale)/60.0) + 220
294-
bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
295-
result += bar
296-
297-
return result
298-
299-
300-
def draw_memory_bar(nodes_list, space_between_minutes, minute_scale, color,
301-
mem_key='runtime_memory_gb'):
348+
# def draw_thread_bar(threads,space_between_minutes, minute_scale, color):
349+
# result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
350+
#
351+
# scale = float(space_between_minutes/float(minute_scale))
352+
# space_between_minutes = float(space_between_minutes/60.0)
353+
#
354+
# for i in range(len(threads)):
355+
# #print threads[i]
356+
# width = threads[i] * 10
357+
# t = (float(i*scale*minute_scale)/60.0) + 220
358+
# bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
359+
# result += bar
360+
#
361+
# return result
362+
363+
def draw_resource_bar(start_time, finish_time, time_series, space_between_minutes,
364+
minute_scale, color, left, resource):
302365
'''
303366
'''
304367

305-
# Init variables
306368
# Memory header
307-
result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
308-
#
369+
result = "<p class='time' style='top:198px;left:%dpx;'>%s</p>" \
370+
% (left, resource)
371+
# Image scaling factors
309372
scale = float(space_between_minutes/float(minute_scale))
310373
space_between_minutes = float(space_between_minutes/scale)
311374

312-
for idx, node in enumerate(nodes_list):
313-
try:
314-
memory = float(node[mem_key])
315-
except:
316-
memory = 0
317-
318-
height = (node['duration'] / 60) * scale * space_between_minutes
319-
width = memory * 20
320-
t = (float(idx*scale*minute_scale)/60.0) + 220
321-
bar = "<div class='bar' style='background-color:"+color+";height:"+ \
322-
str(height) + "px;width:"+ str(width) +\
323-
"px;left:1200px;top:"+str(t)+"px'></div>"
324-
result += bar
325-
375+
# Iterate through time series
376+
ts_len = len(time_series)
377+
for idx, (ts_start, amount) in enumerate(time_series.iteritems()):
378+
if idx < ts_len-1:
379+
ts_end = time_series.index[idx+1]
380+
else:
381+
ts_end = finish_time
382+
# Calculate offset from start at top
383+
offset = ((ts_start-start_time).total_seconds() / 60.0) * scale * \
384+
space_between_minutes + 220
385+
# Scale duration
386+
duration_mins = (ts_end-ts_start).total_seconds() / 60.0
387+
height = duration_mins * scale * \
388+
space_between_minutes
389+
if height < 5:
390+
height = 5
391+
height -= 2
392+
393+
# Bar width is proportional to resource amount
394+
width = amount * 20
395+
396+
if resource.lower() == 'memory':
397+
label = '%.3f GB' % amount
398+
else:
399+
label = '%d threads' % amount
400+
401+
# Setup dictionary for bar html string insertion
402+
bar_dict = {'color' : color,
403+
'height' : height,
404+
'width' : width,
405+
'offset': offset,
406+
'left' : left,
407+
'label' : label,
408+
'duration' : duration_mins,
409+
'start' : ts_start.strftime('%Y-%m-%d %H:%M:%S'),
410+
'finish' : ts_end.strftime('%Y-%m-%d %H:%M:%S')}
411+
412+
bar_html = "<div class='bar' style='background-color:%(color)s;"\
413+
"height:%(height).3fpx;width:%(width).3fpx;"\
414+
"left:%(left)dpx; top:%(offset).3fpx;'"\
415+
"title='%(label)s\nduration:%(duration).3f\n"\
416+
"start:%(start)s\nend:%(finish)s'></div>"
417+
# Add another bar to html line
418+
result += bar_html % bar_dict
419+
420+
# Return bar-formatted html string
326421
return result
327422

328423

@@ -379,9 +474,6 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
379474
# generate_gantt_chart('callback.log', 8)
380475
'''
381476

382-
nodes_list = log_to_dict(logfile)
383-
scale = space_between_minutes
384-
385477
#add the html header
386478
html_string = '''<!DOCTYPE html>
387479
<head>
@@ -432,46 +524,54 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
432524
<body>
433525
<div id="content">'''
434526

527+
# Read in json-log to get list of node dicts
528+
nodes_list = log_to_dict(logfile)
435529

436-
#create the header of the report with useful information
530+
# Create the header of the report with useful information
437531
start_node = nodes_list[0]
438532
last_node = nodes_list[-1]
439533
duration = (last_node['finish'] - start_node['start']).total_seconds()
440534

441-
#summary strings of workflow at top
535+
# Get events based dictionary of node run stats
536+
events = create_event_dict(start_node['start'], nodes_list)
537+
538+
# Summary strings of workflow at top
442539
html_string += '<p>Start: ' + start_node['start'].strftime("%Y-%m-%d %H:%M:%S") + '</p>'
443540
html_string += '<p>Finish: ' + last_node['finish'].strftime("%Y-%m-%d %H:%M:%S") + '</p>'
444541
html_string += '<p>Duration: ' + "{0:.2f}".format(duration/60) + ' minutes</p>'
445542
html_string += '<p>Nodes: ' + str(len(nodes_list))+'</p>'
446543
html_string += '<p>Cores: ' + str(cores) + '</p>'
447544

545+
# Draw nipype nodes Gantt chart and runtimes
448546
html_string += draw_lines(start_node['start'], duration, minute_scale,
449547
space_between_minutes)
450548
html_string += draw_nodes(start_node['start'], nodes_list, cores, minute_scale,
451549
space_between_minutes, colors)
452550

453-
result = log_to_events(logfile)
454-
455-
#threads_estimated = calculate_resources(result, 'num_threads')
456-
#html_string += draw_thread_bar(threads_estimated, space_between_minutes, minute_scale, '#90BBD7')
457-
458-
#threads_real = calculate_resources(result, 'runtime_threads')
459-
#html_string += draw_thread_bar(threads_real, space_between_minutes, minute_scale, '#03969D')
460-
461-
462-
#memory_estimated = calculate_resources(result, 'estimated_memory_gb')
463-
#html_string += draw_memory_bar(memory_estimated, space_between_minutes, minute_scale, '#90BBD7')
464-
465-
memory_real = calculate_resources(result, 'runtime_memory_gb')
466-
html_string += draw_memory_bar(nodes_list, space_between_minutes, minute_scale, '#03969D')
467-
551+
# Get memory timeseries
552+
estimated_mem_ts = calculate_resource_timeseries(events, 'estimated_memory_gb')
553+
runtime_mem_ts = calculate_resource_timeseries(events, 'runtime_memory_gb')
554+
# Plot gantt chart
555+
html_string += draw_resource_bar(start_node['start'], last_node['finish'], estimated_mem_ts,
556+
space_between_minutes, minute_scale, '#90BBD7', 1200, 'Memory')
557+
html_string += draw_resource_bar(start_node['start'], last_node['finish'], runtime_mem_ts,
558+
space_between_minutes, minute_scale, '#03969D', 1200, 'Memory')
559+
560+
# Get threads timeseries
561+
estimated_threads_ts = calculate_resource_timeseries(events, 'estimated_threads')
562+
runtime_threads_ts = calculate_resource_timeseries(events, 'runtime_threads')
563+
# Plot gantt chart
564+
html_string += draw_resource_bar(start_node['start'], last_node['finish'], estimated_threads_ts,
565+
space_between_minutes, minute_scale, '#90BBD7', 600, 'Threads')
566+
html_string += draw_resource_bar(start_node['start'], last_node['finish'], runtime_threads_ts,
567+
space_between_minutes, minute_scale, '#03969D', 600, 'Threads')
468568

469569
#finish html
470570
html_string+= '''
471571
</div>
472572
</body>'''
473573

474574
#save file
475-
html_file = open(logfile +'.html', 'wb')
575+
html_file = open(logfile + '.html', 'wb')
476576
html_file.write(html_string)
477577
html_file.close()

0 commit comments

Comments
 (0)