4
4
callback_log.log_nodes_cb()
5
5
"""
6
6
7
+ # Import packages
7
8
# Import packages
8
9
import json
9
10
from dateutil import parser
10
11
import datetime
11
12
import random
13
+ import pandas as pd
14
+ import dateutil
15
+ from collections import OrderedDict
12
16
13
17
14
- def log_to_json (logfile ):
15
- result = []
18
+ def log_to_events (logfile ):
19
+ events = []
16
20
with open (logfile , 'r' ) as content :
17
-
18
21
#read file separating each line
19
22
content = content .read ()
20
23
lines = content .split ('\n ' )
21
- l = []
22
- for i in lines :
24
+
25
+ for l in lines :
26
+ event = None
23
27
try :
24
- y = json .loads (i )
25
- l .append (y )
26
- except Exception :
28
+ event = json .loads (l )
29
+ except Exception , e :
27
30
pass
28
31
29
- lines = l
32
+ if not event : continue
33
+
34
+ if 'start' in event :
35
+ event ['type' ] = 'start'
36
+ event ['time' ] = event ['start' ]
37
+ else :
38
+ event ['type' ] = 'finish'
39
+ event ['time' ] = event ['finish' ]
40
+
41
+ events .append (event )
42
+ return events
30
43
31
- last_node = [ x for x in lines if x . has_key ( 'finish' )][ - 1 ]
44
+ def log_to_dict ( logfile ):
32
45
33
- for i , line in enumerate (lines ):
34
- #get first start it finds
35
- if not line .has_key ('start' ):
46
+ #keep track of important vars
47
+ nodes = [] #all the parsed nodes
48
+ unifinished_nodes = [] #all start nodes that dont have a finish yet
49
+
50
+ with open (logfile , 'r' ) as content :
51
+
52
+ #read file separating each line
53
+ content = content .read ()
54
+ lines = content .split ('\n ' )
55
+
56
+ for l in lines :
57
+ #try to parse each line and transform in a json dict.
58
+ #if the line has a bad format, just skip
59
+ node = None
60
+ try :
61
+ node = json .loads (l )
62
+ except Exception , e :
63
+ pass
64
+
65
+ if not node :
36
66
continue
37
67
38
- #fint the end node for that start
39
- for j in range (i + 1 , len (lines )):
40
- if lines [j ].has_key ('finish' ):
41
- if lines [j ]['id' ] == line ['id' ] and \
42
- lines [j ]['name' ] == line ['name' ]:
43
- line ['finish' ] = lines [j ]['finish' ]
44
- line ['duration' ] = (parser .parse (line ['finish' ]) - \
45
- parser .parse (line ['start' ])).total_seconds ()
46
- result .append (line )
68
+ #if it is a start node, add to unifinished nodes
69
+ if 'start' in node :
70
+ node ['start' ] = parser .parse (node ['start' ])
71
+ unifinished_nodes .append (node )
72
+
73
+ #if it is end node, look in uninished nodes for matching start
74
+ #remove from unifinished list and add to node list
75
+ elif 'finish' in node :
76
+ node ['finish' ] = parser .parse (node ['finish' ])
77
+ #because most nodes are small, we look backwards in the unfinished list
78
+ for s in range (len (unifinished_nodes )):
79
+ aux = unifinished_nodes [s ]
80
+ #found the end for node start, copy over info
81
+ if aux ['id' ] == node ['id' ] and aux ['name' ] == node ['name' ] and aux ['start' ] < node ['finish' ]:
82
+ node ['start' ] = aux ['start' ]
83
+ node ['duration' ] = (node ['finish' ] - node ['start' ]).total_seconds ()
84
+
85
+ unifinished_nodes .remove (aux )
86
+ nodes .append (node )
47
87
break
48
88
49
- return result , last_node
50
-
89
+ #finished parsing
90
+ #assume nodes without finish didn't finish running.
91
+ #set their finish to last node run
92
+ last_node = nodes [- 1 ]
93
+ for n in unifinished_nodes :
94
+ n ['finish' ] = last_node ['finish' ]
95
+ n ['duration' ] = (n ['finish' ] - n ['start' ]).total_seconds ()
96
+ nodes .append (n )
97
+
98
+ return nodes , last_node
99
+
100
+ def calculate_resources (events , resource ):
101
+ res = OrderedDict ()
102
+ for event in events :
103
+ all_res = 0
104
+ if event ['type' ] == "start" :
105
+ all_res += int (float (event [resource ]))
106
+ current_time = event ['start' ];
107
+ elif event ['type' ] == "finish" :
108
+ all_res += int (float (event [resource ]))
109
+ current_time = event ['finish' ];
110
+
111
+ res [current_time ] = all_res
112
+
113
+ timestamps = [dateutil .parser .parse (ts ) for ts in res .keys ()]
114
+ time_series = pd .Series (res .values (), timestamps )
115
+ interp_seq = pd .date_range (time_series .index [0 ], time_series .index [- 1 ], freq = 'S' )
116
+ interp_time_series = time_series .reindex (interp_seq )
117
+ interp_time_series = interp_time_series .fillna (method = 'ffill' )
118
+ return interp_time_series
51
119
52
120
#total duration in seconds
53
121
def draw_lines (start , total_duration , minute_scale , scale ):
@@ -68,15 +136,18 @@ def draw_lines(start, total_duration, minute_scale, scale):
68
136
return result
69
137
70
138
71
- def draw_nodes (start , nodes , cores , scale , colors ):
139
+ def draw_nodes (start , nodes , cores , minute_scale , space_between_minutes , colors ):
72
140
result = ''
73
141
end_times = [datetime .datetime (start .year , start .month , start .day , start .hour , start .minute , start .second ) for x in range (cores )]
74
142
143
+ scale = float (space_between_minutes / float (minute_scale ))
144
+ space_between_minutes = float (space_between_minutes / scale )
145
+
75
146
for node in nodes :
76
- node_start = parser . parse ( node ['start' ])
77
- node_finish = parser . parse ( node ['finish' ])
78
- offset = ((node_start - start ).total_seconds () / 60 ) * scale + 220
79
- scale_duration = (node ['duration' ] / 60 ) * scale
147
+ node_start = node ['start' ]
148
+ node_finish = node ['finish' ]
149
+ offset = ((node_start - start ).total_seconds () / 60 ) * scale * space_between_minutes + 220
150
+ scale_duration = (node ['duration' ] / 60 ) * scale * space_between_minutes
80
151
if scale_duration < 5 :
81
152
scale_duration = 5
82
153
@@ -91,90 +162,40 @@ def draw_nodes(start, nodes, cores, scale, colors):
91
162
node_finish .hour ,
92
163
node_finish .minute ,
93
164
node_finish .second )
94
- #end_times[j]+= datetime.timedelta(microseconds=node_finish.microsecond)
95
- break
96
165
97
- color = random .choice (colors )
98
- new_node = "<div class='node' style=' left:" + str (left ) + \
99
- "px;top: " + str (offset ) + "px;height:" + \
100
- str (scale_duration ) + "px; background-color: " + color + \
101
- " 'title='" + node ['name' ] + '\n duration: ' + \
102
- str (node ['duration' ]/ 60 ) + '\n start: ' + node ['start' ] + \
103
- '\n end: ' + node ['finish' ] + "'></div>" ;
166
+ break
167
+ color = random .choice (colors )
168
+ n_start = node ['start' ].strftime ("%Y-%m-%d %H:%M:%S" )
169
+ n_finish = node ['finish' ].strftime ("%Y-%m-%d %H:%M:%S" )
170
+ n_dur = node ['duration' ]/ 60
171
+ new_node = "<div class='node' style='left:%spx;top:%spx;height:%spx;background-color:%s;'title='%s\n duration:%s\n start:%s\n end:%s'></div>" % (left , offset , scale_duration , color , node ['name' ], n_dur , n_start , n_finish )
104
172
result += new_node
105
- return result
106
173
174
+ return result
107
175
108
- def draw_thread_bar (start , total_duration , nodes , space_between_minutes , minute_scale ):
176
+ def draw_thread_bar (threads , space_between_minutes , minute_scale ):
109
177
result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
110
178
111
- total = total_duration / 60
112
- thread = [0 for x in range (total )]
113
-
114
- now = start
115
-
116
- #calculate nuber of threads in every second
117
- for i in range (total ):
118
- node_start = None
119
- node_finish = None
120
-
121
- for j in range (i , len (nodes )):
122
- node_start = parser .parse (nodes [j ]['start' ])
123
- node_finish = parser .parse (nodes [j ]['finish' ])
124
-
125
- if node_start <= now and node_finish >= now :
126
- thread [i ] += nodes [j ]['num_threads' ]
127
- if node_start > now :
128
- break
129
- now += datetime .timedelta (minutes = 1 )
130
-
131
-
132
- #draw thread bar
133
179
scale = float (space_between_minutes / float (minute_scale ))
134
-
135
- for i in range (len (thread )):
136
- width = thread [i ] * 10
137
- t = (i * scale * minute_scale ) + 220
138
- bar = "<div class='bar' style='height:" + str (space_between_minutes ) + \
139
- "px;width:" + str (width ) + "px;left:900px;top:" + str (t )+ "px'></div>"
180
+ space_between_minutes = float (space_between_minutes / 60.0 )
181
+ for i in range (len (threads )):
182
+ width = threads [i ] * 10
183
+ t = (float (i * scale * minute_scale )/ 60.0 ) + 220
184
+ bar = "<div class='bar' style='height:" + str (space_between_minutes ) + "px;width:" + str (width ) + "px;left:900px;top:" + str (t )+ "px'></div>"
140
185
result += bar
141
186
142
187
return result
143
188
144
-
145
- def draw_memory_bar (start , total_duration , nodes ,
146
- space_between_minutes , minute_scale ):
189
+ def draw_memory_bar (memory , space_between_minutes , minute_scale ):
147
190
result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
148
191
149
- total = total_duration / 60
150
- memory = [0 for x in range (total )]
151
-
152
- now = start
153
-
154
- #calculate nuber of threads in every second
155
- for i in range (total ):
156
- node_start = None
157
- node_finish = None
158
-
159
- for j in range (i , len (nodes )):
160
- node_start = parser .parse (nodes [j ]['start' ])
161
- node_finish = parser .parse (nodes [j ]['finish' ])
162
-
163
- if node_start <= now and node_finish >= now :
164
- memory [i ] += nodes [j ]['estimated_memory_gb' ]
165
- if node_start > now :
166
- break
167
- now += datetime .timedelta (minutes = 1 )
168
-
169
-
170
- #draw thread bar
171
192
scale = float (space_between_minutes / float (minute_scale ))
193
+ space_between_minutes = float (space_between_minutes / 60.0 )
172
194
173
195
for i in range (len (memory )):
174
196
width = memory [i ] * 10
175
- t = (i * scale * minute_scale ) + 220
176
- bar = "<div class='bar' style='height:" + str (space_between_minutes ) + \
177
- "px;width:" + str (width ) + "px;left:1200px;top:" + str (t )+ "px'></div>"
197
+ t = (float (i * scale * minute_scale )/ 60.0 ) + 220
198
+ bar = "<div class='bar' style='height:" + str (space_between_minutes ) + "px;width:" + str (width ) + "px;left:1200px;top:" + str (t )+ "px'></div>"
178
199
result += bar
179
200
180
201
return result
@@ -207,7 +228,7 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
207
228
# generate_gantt_chart('callback.log', 8)
208
229
'''
209
230
210
- result , last_node = log_to_json (logfile )
231
+ result , last_node = log_to_dict (logfile )
211
232
scale = space_between_minutes
212
233
213
234
#add the html header
@@ -262,24 +283,25 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
262
283
263
284
264
285
#create the header of the report with useful information
265
- start = parser . parse ( result [0 ]['start' ])
266
- duration = int (( parser . parse ( last_node ['finish' ]) - start ).total_seconds () )
286
+ start = result [0 ]['start' ]
287
+ duration = ( last_node ['finish' ] - start ).total_seconds ()
267
288
268
- html_string += '<p>Start: ' + result [0 ]['start' ] + '</p>'
269
- html_string += '<p>Finish: ' + last_node ['finish' ] + '</p>'
270
- html_string += '<p>Duration: ' + str (duration / 60 ) + ' minutes</p>'
289
+ html_string += '<p>Start: ' + result [0 ]['start' ]. strftime ( "%Y-%m-%d %H:%M:%S" ) + '</p>'
290
+ html_string += '<p>Finish: ' + last_node ['finish' ]. strftime ( "%Y-%m-%d %H:%M:%S" ) + '</p>'
291
+ html_string += '<p>Duration: ' + "{0:.2f}" . format (duration / 60 ) + ' minutes</p>'
271
292
html_string += '<p>Nodes: ' + str (len (result ))+ '</p>'
272
293
html_string += '<p>Cores: ' + str (cores )+ '</p>'
273
294
295
+ html_string += draw_lines (start , duration , minute_scale , space_between_minutes )
296
+ html_string += draw_nodes (start , result , cores , minute_scale ,space_between_minutes , colors )
274
297
275
- #draw lines
276
- html_string += draw_lines (start , duration , minute_scale , scale )
298
+ result = log_to_events (logfile )
299
+ threads = calculate_resources (result , 'num_threads' )
300
+ html_string += draw_thread_bar (threads , space_between_minutes , minute_scale )
277
301
278
- #draw nodes
279
- html_string += draw_nodes ( start , result , cores , scale , colors )
302
+ memory = calculate_resources ( result , 'estimated_memory_gb' )
303
+ html_string += draw_memory_bar ( memory , space_between_minutes , minute_scale )
280
304
281
- #html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
282
- #html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
283
305
284
306
#finish html
285
307
html_string += '''
@@ -289,4 +311,4 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
289
311
#save file
290
312
html_file = open (logfile + '.html' , 'wb' )
291
313
html_file .write (html_string )
292
- html_file .close ()
314
+ html_file .close ()
0 commit comments