Skip to content

Commit e1d19cb

Browse files
committed
improve thread draw algorithm
1 parent ace7368 commit e1d19cb

File tree

1 file changed

+117
-105
lines changed

1 file changed

+117
-105
lines changed

nipype/utils/draw_gantt_chart.py

Lines changed: 117 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,118 @@
44
callback_log.log_nodes_cb()
55
"""
66

7+
# Import packages
78
# Import packages
89
import json
910
from dateutil import parser
1011
import datetime
1112
import random
13+
import pandas as pd
14+
import dateutil
15+
from collections import OrderedDict
1216

1317

14-
def log_to_json(logfile):
15-
result = []
18+
def log_to_events(logfile):
19+
events = []
1620
with open(logfile, 'r') as content:
17-
1821
#read file separating each line
1922
content = content.read()
2023
lines = content.split('\n')
21-
l = []
22-
for i in lines:
24+
25+
for l in lines:
26+
event = None
2327
try:
24-
y = json.loads(i)
25-
l.append(y)
26-
except Exception:
28+
event = json.loads(l)
29+
except Exception, e:
2730
pass
2831

29-
lines = l
32+
if not event: continue
33+
34+
if 'start' in event:
35+
event['type'] = 'start'
36+
event['time'] = event['start']
37+
else:
38+
event['type'] = 'finish'
39+
event['time'] = event['finish']
40+
41+
events.append(event)
42+
return events
43+
44+
def log_to_dict(logfile):
45+
46+
#keep track of important vars
47+
nodes = [] #all the parsed nodes
48+
unifinished_nodes = [] #all start nodes that dont have a finish yet
49+
50+
with open(logfile, 'r') as content:
51+
52+
#read file separating each line
53+
content = content.read()
54+
lines = content.split('\n')
3055

31-
last_node = [ x for x in lines if x.has_key('finish')][-1]
56+
for l in lines:
57+
#try to parse each line and transform in a json dict.
58+
#if the line has a bad format, just skip
59+
node = None
60+
try:
61+
node = json.loads(l)
62+
except Exception, e:
63+
pass
3264

33-
for i, line in enumerate(lines):
34-
#get first start it finds
35-
if not line.has_key('start'):
65+
if not node:
3666
continue
3767

38-
#fint the end node for that start
39-
for j in range(i+1, len(lines)):
40-
if lines[j].has_key('finish'):
41-
if lines[j]['id'] == line['id'] and \
42-
lines[j]['name'] == line['name']:
43-
line['finish'] = lines[j]['finish']
44-
line['duration'] = (parser.parse(line['finish']) - \
45-
parser.parse(line['start'])).total_seconds()
46-
result.append(line)
68+
#if it is a start node, add to unifinished nodes
69+
if 'start' in node:
70+
node['start'] = parser.parse(node['start'])
71+
unifinished_nodes.append(node)
72+
73+
#if it is end node, look in uninished nodes for matching start
74+
#remove from unifinished list and add to node list
75+
elif 'finish' in node:
76+
node['finish'] = parser.parse(node['finish'])
77+
#because most nodes are small, we look backwards in the unfinished list
78+
for s in range(len(unifinished_nodes)):
79+
aux = unifinished_nodes[s]
80+
#found the end for node start, copy over info
81+
if aux['id'] == node['id'] and aux['name'] == node['name'] and aux['start'] < node['finish']:
82+
node['start'] = aux['start']
83+
node['duration'] = (node['finish'] - node['start']).total_seconds()
84+
85+
unifinished_nodes.remove(aux)
86+
nodes.append(node)
4787
break
4888

49-
return result, last_node
50-
89+
#finished parsing
90+
#assume nodes without finish didn't finish running.
91+
#set their finish to last node run
92+
last_node = nodes[-1]
93+
for n in unifinished_nodes:
94+
n['finish'] = last_node['finish']
95+
n['duration'] = (n['finish'] - n['start']).total_seconds()
96+
nodes.append(n)
97+
98+
return nodes, last_node
99+
100+
def calculate_resources(events, resource):
101+
res = OrderedDict()
102+
for event in events:
103+
all_res = 0
104+
if event['type'] == "start":
105+
all_res =+ int(float(event[resource]))
106+
current_time = event['start'];
107+
elif event['type'] == "finish":
108+
all_res+ int(float(event[resource]))
109+
current_time = event['finish'];
110+
111+
res[current_time] = all_res
112+
113+
timestamps = [dateutil.parser.parse(ts) for ts in res.keys()]
114+
time_series = pd.Series(res.values(), timestamps)
115+
interp_seq = pd.date_range(time_series.index[0], time_series.index[-1], freq='S')
116+
interp_time_series = time_series.reindex(interp_seq)
117+
interp_time_series = interp_time_series.fillna(method='ffill')
118+
return interp_time_series
51119

52120
#total duration in seconds
53121
def draw_lines(start, total_duration, minute_scale, scale):
@@ -73,8 +141,8 @@ def draw_nodes(start, nodes, cores, scale, colors):
73141
end_times = [datetime.datetime(start.year, start.month, start.day, start.hour, start.minute, start.second) for x in range(cores)]
74142

75143
for node in nodes:
76-
node_start = parser.parse(node['start'])
77-
node_finish = parser.parse(node['finish'])
144+
node_start = node['start']
145+
node_finish = node['finish']
78146
offset = ((node_start - start).total_seconds() / 60) * scale + 220
79147
scale_duration = (node['duration'] / 60) * scale
80148
if scale_duration < 5:
@@ -93,88 +161,34 @@ def draw_nodes(start, nodes, cores, scale, colors):
93161
node_finish.second)
94162
#end_times[j]+= datetime.timedelta(microseconds=node_finish.microsecond)
95163
break
96-
97-
color = random.choice(colors)
98-
new_node = "<div class='node' style=' left:" + str(left) + \
99-
"px;top: " + str(offset) + "px;height:" + \
100-
str(scale_duration) + "px; background-color: " + color + \
101-
" 'title='" + node['name'] +'\nduration: ' + \
102-
str(node['duration']/60) + '\nstart: ' + node['start'] + \
103-
'\nend: ' + node['finish'] + "'></div>";
164+
color = random.choice(colors)
165+
new_node = "<div class='node' style=' left:" + str(left) + "px;top: " + str(offset) + "px;height:" + str(scale_duration) + "px; background-color: " + color + " 'title='" + node['name'] +'\nduration: ' + str(node['duration']/60) + '\nstart: ' + node['start'].strftime("%Y-%m-%d %H:%M:%S") + '\nend: ' + node['finish'].strftime("%Y-%m-%d %H:%M:%S") + "'></div>";
104166
result += new_node
105167
return result
106168

107-
108-
def draw_thread_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
169+
def draw_thread_bar(threads,space_between_minutes, minute_scale):
109170
result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
110171

111-
total = total_duration/60
112-
thread = [0 for x in range(total)]
113-
114-
now = start
115-
116-
#calculate nuber of threads in every second
117-
for i in range(total):
118-
node_start = None
119-
node_finish = None
120-
121-
for j in range(i, len(nodes)):
122-
node_start = parser.parse(nodes[j]['start'])
123-
node_finish = parser.parse(nodes[j]['finish'])
124-
125-
if node_start <= now and node_finish >= now:
126-
thread[i] += nodes[j]['num_threads']
127-
if node_start > now:
128-
break
129-
now += datetime.timedelta(minutes=1)
130-
131-
132-
#draw thread bar
133172
scale = float(space_between_minutes/float(minute_scale))
134-
135-
for i in range(len(thread)):
136-
width = thread[i] * 10
137-
t = (i*scale*minute_scale) + 220
138-
bar = "<div class='bar' style='height:" + str(space_between_minutes) + \
139-
"px;width:" + str(width) + "px;left:900px;top:"+str(t)+"px'></div>"
173+
space_between_minutes = float(space_between_minutes/60.0)
174+
for i in range(len(threads)):
175+
width = threads[i] * 10
176+
t = (float(i*scale*minute_scale)/60.0) + 220
177+
bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
140178
result += bar
141179

142180
return result
143181

144-
145-
def draw_memory_bar(start, total_duration, nodes,
146-
space_between_minutes, minute_scale):
182+
def draw_memory_bar(memory, space_between_minutes, minute_scale):
147183
result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
148184

149-
total = total_duration/60
150-
memory = [0 for x in range(total)]
151-
152-
now = start
153-
154-
#calculate nuber of threads in every second
155-
for i in range(total):
156-
node_start = None
157-
node_finish = None
158-
159-
for j in range(i, len(nodes)):
160-
node_start = parser.parse(nodes[j]['start'])
161-
node_finish = parser.parse(nodes[j]['finish'])
162-
163-
if node_start <= now and node_finish >= now:
164-
memory[i] += nodes[j]['estimated_memory_gb']
165-
if node_start > now:
166-
break
167-
now += datetime.timedelta(minutes=1)
168-
169-
170-
#draw thread bar
171185
scale = float(space_between_minutes/float(minute_scale))
186+
space_between_minutes = float(space_between_minutes/60.0)
172187

173188
for i in range(len(memory)):
174189
width = memory[i] * 10
175-
t = (i*scale*minute_scale) + 220
176-
bar = "<div class='bar' style='height:" + str(space_between_minutes) + \
177-
"px;width:" + str(width) + "px;left:1200px;top:"+str(t)+"px'></div>"
190+
t = (float(i*scale*minute_scale)/60.0) + 220
191+
bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:1200px;top:"+str(t)+"px'></div>"
178192
result += bar
179193

180194
return result
@@ -207,7 +221,7 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
207221
# generate_gantt_chart('callback.log', 8)
208222
'''
209223

210-
result, last_node = log_to_json(logfile)
224+
result, last_node = log_to_dict(logfile)
211225
scale = space_between_minutes
212226

213227
#add the html header
@@ -262,24 +276,22 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
262276

263277

264278
#create the header of the report with useful information
265-
start = parser.parse(result[0]['start'])
266-
duration = int((parser.parse(last_node['finish']) - start).total_seconds())
279+
start = result[0]['start']
280+
duration = (last_node['finish'] - start).total_seconds()
267281

268-
html_string += '<p>Start: '+ result[0]['start'] +'</p>'
269-
html_string += '<p>Finish: '+ last_node['finish'] +'</p>'
282+
html_string += '<p>Start: '+ result[0]['start'].strftime("%Y-%m-%d %H:%M:%S") +'</p>'
283+
html_string += '<p>Finish: '+ last_node['finish'].strftime("%Y-%m-%d %H:%M:%S") +'</p>'
270284
html_string += '<p>Duration: '+ str(duration/60) +' minutes</p>'
271285
html_string += '<p>Nodes: '+str(len(result))+'</p>'
272286
html_string += '<p>Cores: '+str(cores)+'</p>'
273287

288+
result = log_to_events(logfile)
289+
threads = calculate_resources(result, 'num_threads')
290+
html_string += draw_thread_bar(threads, space_between_minutes, minute_scale)
274291

275-
#draw lines
276-
html_string += draw_lines(start, duration, minute_scale, scale)
277-
278-
#draw nodes
279-
html_string += draw_nodes(start, result, cores, scale, colors)
292+
memory = calculate_resources(result, 'estimated_memory_gb')
293+
html_string += draw_memory_bar(memory, space_between_minutes, minute_scale)
280294

281-
#html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
282-
#html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
283295

284296
#finish html
285297
html_string+= '''
@@ -289,4 +301,4 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
289301
#save file
290302
html_file = open(logfile +'.html', 'wb')
291303
html_file.write(html_string)
292-
html_file.close()
304+
html_file.close()

0 commit comments

Comments
 (0)