4
4
callback_log.log_nodes_cb()
5
5
"""
6
6
7
+ # Import packages
7
8
# Import packages
8
9
import json
9
10
from dateutil import parser
10
11
import datetime
11
12
import random
13
+ import pandas as pd
14
+ import dateutil
15
+ from collections import OrderedDict
12
16
13
17
14
- def log_to_json (logfile ):
15
- result = []
18
+ def log_to_events (logfile ):
19
+ events = []
16
20
with open (logfile , 'r' ) as content :
17
-
18
21
#read file separating each line
19
22
content = content .read ()
20
23
lines = content .split ('\n ' )
21
- l = []
22
- for i in lines :
24
+
25
+ for l in lines :
26
+ event = None
23
27
try :
24
- y = json .loads (i )
25
- l .append (y )
26
- except Exception :
28
+ event = json .loads (l )
29
+ except Exception , e :
27
30
pass
28
31
29
- lines = l
32
+ if not event : continue
33
+
34
+ if 'start' in event :
35
+ event ['type' ] = 'start'
36
+ event ['time' ] = event ['start' ]
37
+ else :
38
+ event ['type' ] = 'finish'
39
+ event ['time' ] = event ['finish' ]
40
+
41
+ events .append (event )
42
+ return events
43
+
44
+ def log_to_dict (logfile ):
45
+
46
+ #keep track of important vars
47
+ nodes = [] #all the parsed nodes
48
+ unifinished_nodes = [] #all start nodes that dont have a finish yet
49
+
50
+ with open (logfile , 'r' ) as content :
51
+
52
+ #read file separating each line
53
+ content = content .read ()
54
+ lines = content .split ('\n ' )
30
55
31
- last_node = [ x for x in lines if x .has_key ('finish' )][- 1 ]
56
+ for l in lines :
57
+ #try to parse each line and transform in a json dict.
58
+ #if the line has a bad format, just skip
59
+ node = None
60
+ try :
61
+ node = json .loads (l )
62
+ except Exception , e :
63
+ pass
32
64
33
- for i , line in enumerate (lines ):
34
- #get first start it finds
35
- if not line .has_key ('start' ):
65
+ if not node :
36
66
continue
37
67
38
- #fint the end node for that start
39
- for j in range (i + 1 , len (lines )):
40
- if lines [j ].has_key ('finish' ):
41
- if lines [j ]['id' ] == line ['id' ] and \
42
- lines [j ]['name' ] == line ['name' ]:
43
- line ['finish' ] = lines [j ]['finish' ]
44
- line ['duration' ] = (parser .parse (line ['finish' ]) - \
45
- parser .parse (line ['start' ])).total_seconds ()
46
- result .append (line )
68
+ #if it is a start node, add to unifinished nodes
69
+ if 'start' in node :
70
+ node ['start' ] = parser .parse (node ['start' ])
71
+ unifinished_nodes .append (node )
72
+
73
+ #if it is end node, look in uninished nodes for matching start
74
+ #remove from unifinished list and add to node list
75
+ elif 'finish' in node :
76
+ node ['finish' ] = parser .parse (node ['finish' ])
77
+ #because most nodes are small, we look backwards in the unfinished list
78
+ for s in range (len (unifinished_nodes )):
79
+ aux = unifinished_nodes [s ]
80
+ #found the end for node start, copy over info
81
+ if aux ['id' ] == node ['id' ] and aux ['name' ] == node ['name' ] and aux ['start' ] < node ['finish' ]:
82
+ node ['start' ] = aux ['start' ]
83
+ node ['duration' ] = (node ['finish' ] - node ['start' ]).total_seconds ()
84
+
85
+ unifinished_nodes .remove (aux )
86
+ nodes .append (node )
47
87
break
48
88
49
- return result , last_node
50
-
89
+ #finished parsing
90
+ #assume nodes without finish didn't finish running.
91
+ #set their finish to last node run
92
+ last_node = nodes [- 1 ]
93
+ for n in unifinished_nodes :
94
+ n ['finish' ] = last_node ['finish' ]
95
+ n ['duration' ] = (n ['finish' ] - n ['start' ]).total_seconds ()
96
+ nodes .append (n )
97
+
98
+ return nodes , last_node
99
+
100
+ def calculate_resources (events , resource ):
101
+ res = OrderedDict ()
102
+ for event in events :
103
+ all_res = 0
104
+ if event ['type' ] == "start" :
105
+ all_res = + int (float (event [resource ]))
106
+ current_time = event ['start' ];
107
+ elif event ['type' ] == "finish" :
108
+ all_res + int (float (event [resource ]))
109
+ current_time = event ['finish' ];
110
+
111
+ res [current_time ] = all_res
112
+
113
+ timestamps = [dateutil .parser .parse (ts ) for ts in res .keys ()]
114
+ time_series = pd .Series (res .values (), timestamps )
115
+ interp_seq = pd .date_range (time_series .index [0 ], time_series .index [- 1 ], freq = 'S' )
116
+ interp_time_series = time_series .reindex (interp_seq )
117
+ interp_time_series = interp_time_series .fillna (method = 'ffill' )
118
+ return interp_time_series
51
119
52
120
#total duration in seconds
53
121
def draw_lines (start , total_duration , minute_scale , scale ):
@@ -73,8 +141,8 @@ def draw_nodes(start, nodes, cores, scale, colors):
73
141
end_times = [datetime .datetime (start .year , start .month , start .day , start .hour , start .minute , start .second ) for x in range (cores )]
74
142
75
143
for node in nodes :
76
- node_start = parser . parse ( node ['start' ])
77
- node_finish = parser . parse ( node ['finish' ])
144
+ node_start = node ['start' ]
145
+ node_finish = node ['finish' ]
78
146
offset = ((node_start - start ).total_seconds () / 60 ) * scale + 220
79
147
scale_duration = (node ['duration' ] / 60 ) * scale
80
148
if scale_duration < 5 :
@@ -93,88 +161,34 @@ def draw_nodes(start, nodes, cores, scale, colors):
93
161
node_finish .second )
94
162
#end_times[j]+= datetime.timedelta(microseconds=node_finish.microsecond)
95
163
break
96
-
97
- color = random .choice (colors )
98
- new_node = "<div class='node' style=' left:" + str (left ) + \
99
- "px;top: " + str (offset ) + "px;height:" + \
100
- str (scale_duration ) + "px; background-color: " + color + \
101
- " 'title='" + node ['name' ] + '\n duration: ' + \
102
- str (node ['duration' ]/ 60 ) + '\n start: ' + node ['start' ] + \
103
- '\n end: ' + node ['finish' ] + "'></div>" ;
164
+ color = random .choice (colors )
165
+ new_node = "<div class='node' style=' left:" + str (left ) + "px;top: " + str (offset ) + "px;height:" + str (scale_duration ) + "px; background-color: " + color + " 'title='" + node ['name' ] + '\n duration: ' + str (node ['duration' ]/ 60 ) + '\n start: ' + node ['start' ].strftime ("%Y-%m-%d %H:%M:%S" ) + '\n end: ' + node ['finish' ].strftime ("%Y-%m-%d %H:%M:%S" ) + "'></div>" ;
104
166
result += new_node
105
167
return result
106
168
107
-
108
- def draw_thread_bar (start , total_duration , nodes , space_between_minutes , minute_scale ):
169
+ def draw_thread_bar (threads ,space_between_minutes , minute_scale ):
109
170
result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
110
171
111
- total = total_duration / 60
112
- thread = [0 for x in range (total )]
113
-
114
- now = start
115
-
116
- #calculate nuber of threads in every second
117
- for i in range (total ):
118
- node_start = None
119
- node_finish = None
120
-
121
- for j in range (i , len (nodes )):
122
- node_start = parser .parse (nodes [j ]['start' ])
123
- node_finish = parser .parse (nodes [j ]['finish' ])
124
-
125
- if node_start <= now and node_finish >= now :
126
- thread [i ] += nodes [j ]['num_threads' ]
127
- if node_start > now :
128
- break
129
- now += datetime .timedelta (minutes = 1 )
130
-
131
-
132
- #draw thread bar
133
172
scale = float (space_between_minutes / float (minute_scale ))
134
-
135
- for i in range (len (thread )):
136
- width = thread [i ] * 10
137
- t = (i * scale * minute_scale ) + 220
138
- bar = "<div class='bar' style='height:" + str (space_between_minutes ) + \
139
- "px;width:" + str (width ) + "px;left:900px;top:" + str (t )+ "px'></div>"
173
+ space_between_minutes = float (space_between_minutes / 60.0 )
174
+ for i in range (len (threads )):
175
+ width = threads [i ] * 10
176
+ t = (float (i * scale * minute_scale )/ 60.0 ) + 220
177
+ bar = "<div class='bar' style='height:" + str (space_between_minutes ) + "px;width:" + str (width ) + "px;left:900px;top:" + str (t )+ "px'></div>"
140
178
result += bar
141
179
142
180
return result
143
181
144
-
145
- def draw_memory_bar (start , total_duration , nodes ,
146
- space_between_minutes , minute_scale ):
182
+ def draw_memory_bar (memory , space_between_minutes , minute_scale ):
147
183
result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
148
184
149
- total = total_duration / 60
150
- memory = [0 for x in range (total )]
151
-
152
- now = start
153
-
154
- #calculate nuber of threads in every second
155
- for i in range (total ):
156
- node_start = None
157
- node_finish = None
158
-
159
- for j in range (i , len (nodes )):
160
- node_start = parser .parse (nodes [j ]['start' ])
161
- node_finish = parser .parse (nodes [j ]['finish' ])
162
-
163
- if node_start <= now and node_finish >= now :
164
- memory [i ] += nodes [j ]['estimated_memory_gb' ]
165
- if node_start > now :
166
- break
167
- now += datetime .timedelta (minutes = 1 )
168
-
169
-
170
- #draw thread bar
171
185
scale = float (space_between_minutes / float (minute_scale ))
186
+ space_between_minutes = float (space_between_minutes / 60.0 )
172
187
173
188
for i in range (len (memory )):
174
189
width = memory [i ] * 10
175
- t = (i * scale * minute_scale ) + 220
176
- bar = "<div class='bar' style='height:" + str (space_between_minutes ) + \
177
- "px;width:" + str (width ) + "px;left:1200px;top:" + str (t )+ "px'></div>"
190
+ t = (float (i * scale * minute_scale )/ 60.0 ) + 220
191
+ bar = "<div class='bar' style='height:" + str (space_between_minutes ) + "px;width:" + str (width ) + "px;left:1200px;top:" + str (t )+ "px'></div>"
178
192
result += bar
179
193
180
194
return result
@@ -207,7 +221,7 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
207
221
# generate_gantt_chart('callback.log', 8)
208
222
'''
209
223
210
- result , last_node = log_to_json (logfile )
224
+ result , last_node = log_to_dict (logfile )
211
225
scale = space_between_minutes
212
226
213
227
#add the html header
@@ -262,24 +276,22 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
262
276
263
277
264
278
#create the header of the report with useful information
265
- start = parser . parse ( result [0 ]['start' ])
266
- duration = int (( parser . parse ( last_node ['finish' ]) - start ).total_seconds () )
279
+ start = result [0 ]['start' ]
280
+ duration = ( last_node ['finish' ] - start ).total_seconds ()
267
281
268
- html_string += '<p>Start: ' + result [0 ]['start' ] + '</p>'
269
- html_string += '<p>Finish: ' + last_node ['finish' ] + '</p>'
282
+ html_string += '<p>Start: ' + result [0 ]['start' ]. strftime ( "%Y-%m-%d %H:%M:%S" ) + '</p>'
283
+ html_string += '<p>Finish: ' + last_node ['finish' ]. strftime ( "%Y-%m-%d %H:%M:%S" ) + '</p>'
270
284
html_string += '<p>Duration: ' + str (duration / 60 ) + ' minutes</p>'
271
285
html_string += '<p>Nodes: ' + str (len (result ))+ '</p>'
272
286
html_string += '<p>Cores: ' + str (cores )+ '</p>'
273
287
288
+ result = log_to_events (logfile )
289
+ threads = calculate_resources (result , 'num_threads' )
290
+ html_string += draw_thread_bar (threads , space_between_minutes , minute_scale )
274
291
275
- #draw lines
276
- html_string += draw_lines (start , duration , minute_scale , scale )
277
-
278
- #draw nodes
279
- html_string += draw_nodes (start , result , cores , scale , colors )
292
+ memory = calculate_resources (result , 'estimated_memory_gb' )
293
+ html_string += draw_memory_bar (memory , space_between_minutes , minute_scale )
280
294
281
- #html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
282
- #html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
283
295
284
296
#finish html
285
297
html_string += '''
@@ -289,4 +301,4 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
289
301
#save file
290
302
html_file = open (logfile + '.html' , 'wb' )
291
303
html_file .write (html_string )
292
- html_file .close ()
304
+ html_file .close ()
0 commit comments