improve thread draw algorithm

carolFrohlich · carolFrohlich · commit e1d19cbdf10b · 2016-03-08T17:17:56.000-05:00
diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py
@@ -4,50 +4,118 @@
 callback_log.log_nodes_cb()
 """
 
+# Import packages
 # Import packages
 import json
 from dateutil import parser
 import datetime
 import random
+import pandas as pd
+import dateutil
+from collections import OrderedDict
 
 
-def log_to_json(logfile):
-    result = []
+def log_to_events(logfile):
+    events = []
     with open(logfile, 'r') as content:
-
         #read file separating each line
         content = content.read()
         lines = content.split('\n')
-        l = []
-        for i in lines:
+
+        for l in lines:
+            event = None
             try:
-                y = json.loads(i)
-                l.append(y)
-            except Exception:
+                event = json.loads(l)
+            except Exception, e:
                 pass
 
-        lines = l
+            if not event: continue
+
+            if 'start' in event:
+                event['type'] = 'start'
+                event['time'] = event['start']
+            else:
+                event['type'] = 'finish'
+                event['time'] = event['finish']
+
+            events.append(event)
+    return events
+
+def log_to_dict(logfile):
+
+    #keep track of important vars
+    nodes = [] #all the parsed nodes
+    unifinished_nodes = [] #all start nodes that dont have a finish yet
+
+    with open(logfile, 'r') as content:
+
+        #read file separating each line
+        content = content.read()
+        lines = content.split('\n')
 
-        last_node = [ x for x in lines if x.has_key('finish')][-1]
+        for l in lines:
+            #try to parse each line and transform in a json dict.
+            #if the line has a bad format, just skip
+            node = None
+            try:
+                node = json.loads(l)
+            except Exception, e:
+                pass
 
-        for i, line in enumerate(lines):
-            #get first start it finds
-            if not line.has_key('start'):
+            if not node: 
                 continue
 
-            #fint the end node for that start
-            for j in range(i+1, len(lines)):
-                if lines[j].has_key('finish'):
-                    if lines[j]['id'] == line['id'] and \
-                       lines[j]['name'] == line['name']:
-                        line['finish'] = lines[j]['finish']
-                        line['duration'] = (parser.parse(line['finish']) - \
-                                            parser.parse(line['start'])).total_seconds()
-                        result.append(line)
+            #if it is a start node, add to unifinished nodes
+            if 'start' in node:
+                node['start'] = parser.parse(node['start'])
+                unifinished_nodes.append(node)
+
+            #if it is end node, look in uninished nodes for matching start
+            #remove from unifinished list and add to node list
+            elif 'finish' in node:
+                node['finish'] = parser.parse(node['finish'])
+                #because most nodes are small, we look backwards in the unfinished list
+                for s in range(len(unifinished_nodes)):
+                    aux = unifinished_nodes[s]
+                    #found the end for node start, copy over info
+                    if aux['id'] == node['id'] and aux['name'] == node['name'] and aux['start'] < node['finish']:
+                        node['start'] = aux['start']
+                        node['duration'] = (node['finish'] - node['start']).total_seconds()
+
+                        unifinished_nodes.remove(aux)
+                        nodes.append(node)
                         break
 
-    return result, last_node
-
+        #finished parsing
+        #assume nodes without finish didn't finish running.
+        #set their finish to last node run
+        last_node = nodes[-1]
+        for n in unifinished_nodes:
+            n['finish'] = last_node['finish']
+            n['duration'] = (n['finish'] - n['start']).total_seconds()
+            nodes.append(n)
+
+        return nodes, last_node
+
+def calculate_resources(events, resource):
+    res = OrderedDict()
+    for event in events:
+        all_res = 0
+        if event['type'] == "start":
+            all_res =+ int(float(event[resource]))
+            current_time = event['start'];
+        elif event['type'] == "finish":
+            all_res+ int(float(event[resource]))
+            current_time = event['finish'];
+
+        res[current_time] = all_res
+
+    timestamps = [dateutil.parser.parse(ts) for ts in res.keys()]
+    time_series = pd.Series(res.values(), timestamps)
+    interp_seq = pd.date_range(time_series.index[0], time_series.index[-1], freq='S')
+    interp_time_series = time_series.reindex(interp_seq)
+    interp_time_series = interp_time_series.fillna(method='ffill')
+    return interp_time_series
 
 #total duration in seconds
 def draw_lines(start, total_duration, minute_scale, scale):
@@ -73,8 +141,8 @@ def draw_nodes(start, nodes, cores, scale, colors):
     end_times = [datetime.datetime(start.year, start.month, start.day, start.hour, start.minute, start.second) for x in range(cores)]
 
     for node in nodes:
-        node_start = parser.parse(node['start'])
-        node_finish = parser.parse(node['finish'])
+        node_start = node['start']
+        node_finish = node['finish']
         offset = ((node_start - start).total_seconds() / 60) * scale + 220
         scale_duration = (node['duration'] / 60) * scale
         if scale_duration < 5:
@@ -93,88 +161,34 @@ def draw_nodes(start, nodes, cores, scale, colors):
                                                  node_finish.second)
                 #end_times[j]+=  datetime.timedelta(microseconds=node_finish.microsecond)
                 break
-
-        color = random.choice(colors)
-        new_node = "<div class='node' style=' left:" + str(left) + \
-                   "px;top: " + str(offset) + "px;height:" + \
-                   str(scale_duration) + "px; background-color: " + color + \
-                   " 'title='" + node['name'] +'\nduration: ' + \
-                   str(node['duration']/60) + '\nstart: ' + node['start'] + \
-                   '\nend: ' + node['finish'] + "'></div>";
+        color = random.choice(colors)  
+        new_node = "<div class='node' style=' left:" + str(left) + "px;top: " + str(offset) + "px;height:" + str(scale_duration) + "px; background-color: " + color  + " 'title='" + node['name'] +'\nduration: ' + str(node['duration']/60) + '\nstart: ' + node['start'].strftime("%Y-%m-%d %H:%M:%S") + '\nend: ' + node['finish'].strftime("%Y-%m-%d %H:%M:%S") + "'></div>";
         result += new_node
     return result
 
-
-def draw_thread_bar(start, total_duration, nodes, space_between_minutes, minute_scale):
+def draw_thread_bar(threads,space_between_minutes, minute_scale):
     result = "<p class='time' style='top:198px;left:900px;'>Threads</p>"
 
-    total = total_duration/60
-    thread = [0 for x in range(total)]
-
-    now = start
-
-    #calculate nuber of threads in every second
-    for i in range(total):
-        node_start = None
-        node_finish = None
-
-        for j in range(i, len(nodes)):
-            node_start = parser.parse(nodes[j]['start'])
-            node_finish = parser.parse(nodes[j]['finish'])
-
-            if node_start <= now and node_finish >= now:
-                thread[i] += nodes[j]['num_threads']
-            if node_start > now:
-                break
-        now += datetime.timedelta(minutes=1)
-
-
-    #draw thread bar
     scale = float(space_between_minutes/float(minute_scale))
-
-    for i in range(len(thread)):
-        width = thread[i] * 10
-        t = (i*scale*minute_scale) + 220
-        bar = "<div class='bar' style='height:" + str(space_between_minutes) + \
-              "px;width:" + str(width) + "px;left:900px;top:"+str(t)+"px'></div>"
+    space_between_minutes = float(space_between_minutes/60.0)
+    for i in range(len(threads)):
+        width = threads[i] * 10
+        t = (float(i*scale*minute_scale)/60.0) + 220
+        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:900px;top:"+str(t)+"px'></div>"
         result += bar
 
     return result
 
-
-def draw_memory_bar(start, total_duration, nodes,
-                    space_between_minutes, minute_scale):
+def draw_memory_bar(memory, space_between_minutes, minute_scale):
     result = "<p class='time' style='top:198px;left:1200px;'>Memory</p>"
 
-    total = total_duration/60
-    memory = [0 for x in range(total)]
-
-    now = start
-
-    #calculate nuber of threads in every second
-    for i in range(total):
-        node_start = None
-        node_finish = None
-
-        for j in range(i, len(nodes)):
-            node_start = parser.parse(nodes[j]['start'])
-            node_finish = parser.parse(nodes[j]['finish'])
-
-            if node_start <= now and node_finish >= now:
-                memory[i] += nodes[j]['estimated_memory_gb']
-            if node_start > now:
-                break
-        now += datetime.timedelta(minutes=1)
-
-
-    #draw thread bar
     scale = float(space_between_minutes/float(minute_scale))
+    space_between_minutes = float(space_between_minutes/60.0)
 
     for i in range(len(memory)):
         width = memory[i] * 10
-        t = (i*scale*minute_scale) + 220
-        bar = "<div class='bar' style='height:" + str(space_between_minutes) + \
-              "px;width:" + str(width) + "px;left:1200px;top:"+str(t)+"px'></div>"
+        t = (float(i*scale*minute_scale)/60.0) + 220
+        bar = "<div class='bar' style='height:"+ str(space_between_minutes) + "px;width:"+ str(width) +"px;left:1200px;top:"+str(t)+"px'></div>"
         result += bar
 
     return result
@@ -207,7 +221,7 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
     # generate_gantt_chart('callback.log', 8)
     '''
 
-    result, last_node = log_to_json(logfile)
+    result, last_node = log_to_dict(logfile)
     scale = space_between_minutes 
 
     #add the html header
@@ -262,24 +276,22 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
 
 
     #create the header of the report with useful information
-    start = parser.parse(result[0]['start'])
-    duration = int((parser.parse(last_node['finish']) - start).total_seconds())
+    start = result[0]['start']
+    duration = (last_node['finish'] - start).total_seconds()
 
-    html_string += '<p>Start: '+ result[0]['start'] +'</p>'
-    html_string += '<p>Finish: '+ last_node['finish'] +'</p>'
+    html_string += '<p>Start: '+ result[0]['start'].strftime("%Y-%m-%d %H:%M:%S") +'</p>'
+    html_string += '<p>Finish: '+ last_node['finish'].strftime("%Y-%m-%d %H:%M:%S") +'</p>'
     html_string += '<p>Duration: '+ str(duration/60) +' minutes</p>'
     html_string += '<p>Nodes: '+str(len(result))+'</p>'
     html_string += '<p>Cores: '+str(cores)+'</p>'
 
+    result = log_to_events(logfile)
+    threads = calculate_resources(result, 'num_threads')
+    html_string += draw_thread_bar(threads, space_between_minutes, minute_scale)
 
-    #draw lines
-    html_string += draw_lines(start, duration, minute_scale, scale)
-
-    #draw nodes
-    html_string += draw_nodes(start, result, cores, scale, colors)
+    memory = calculate_resources(result, 'estimated_memory_gb')
+    html_string += draw_memory_bar(memory, space_between_minutes, minute_scale)
 
-    #html_string += draw_thread_bar(start, duration, result, space_between_minutes, minute_scale)
-    #html_string += draw_memory_bar(start, duration, result, space_between_minutes, minute_scale)
 
     #finish html
     html_string+= '''
@@ -289,4 +301,4 @@ def generate_gantt_chart(logfile, cores, minute_scale=10,
     #save file
     html_file = open(logfile +'.html', 'wb')
     html_file.write(html_string)
-    html_file.close()
+    html_file.close()