-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeedup_graphs.py
More file actions
158 lines (121 loc) · 5.95 KB
/
speedup_graphs.py
File metadata and controls
158 lines (121 loc) · 5.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
def parse_speedup_csv(filename):
with open(filename) as file:
content = file.readlines()
# Remove the header
content = content[1:]
content = [x.strip().split(',') for x in content]
content = [[int(x[0]), float(x[1])] for x in content]
return content
def plot_speedup(dataset, seq_times, max_threads, axs, axs_index=None, title='Speedup vs Number of threads'):
if axs_index is not None:
axs = axs[axs_index]
# plot all csv files
for filename, data in dataset.items():
legend_name = filename
if 'pagerank' in filename:
legend_name = filename[filename.index('_', filename.index('_') + 1)
+ 1:filename.index('.')]
axs.plot([x[0] for x in data], [seq_times[filename] / x[1] for x in data], label=legend_name, marker='o')
# draw a line for ideal speedup and areas of interest
system_cores = os.cpu_count()
line = mlines.Line2D([0, max_threads], [0, max_threads], color='red', linestyle="dashed", label="Linear speedup")
axs.add_line(line)
axs.fill_between([x for x in range(0, max_threads + 1)],
[1] * (max_threads + 1),
alpha=0.3, color="red", label="No improvement zone")
axs.fill_between([x for x in range(0, max_threads + 1)],
[x for x in range(0, max_threads + 1)],
alpha=0.1, color="blue", label="Sublinear speedup zone")
axs.fill_between([x for x in range(0, max_threads + 1)],
[x for x in range(0, max_threads + 1)],
max_threads, alpha=0.1, color="green", label="Super linear speedup zone")
axs.set_xlim(0, max_threads)
axs.set_ylim(0, system_cores)
axs.set_xlabel('Number of threads')
axs.set_ylabel('Speedup')
axs.set_title(title)
axs.grid()
axs.legend()
if __name__ == '__main__':
# get all csv in folder
folder_path = 'src'
filenames = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
# parse all csv files
data_alg_1 = {}
for filename in filenames:
if '_v2' not in filename:
data_alg_1[filename] = parse_speedup_csv(os.path.join(folder_path, filename))
data_alg_2 = {}
for filename in filenames:
if '_v2' in filename:
temp = filename[:filename.index('_v2')] + filename[filename.index('_v2') + 3:]
data_alg_2[temp] = parse_speedup_csv(os.path.join(folder_path, filename))
# sort data by name
data_alg_1 = dict(sorted(data_alg_1.items()))
data_alg_2 = dict(sorted(data_alg_2.items()))
# sequential times
seq_times_alg_1 = {filename: data_alg_1[filename][0][1] for filename in data_alg_1}
seq_times_alg_2 = {filename: data_alg_2[filename][0][1] for filename in data_alg_2}
# plot data
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
axs = [item for sublist in axs for item in sublist]
plot_speedup(data_alg_1, seq_times_alg_1, 27, axs, 0, 'Algorithm 1')
plot_speedup(data_alg_2, seq_times_alg_2, 27, axs, 1, 'Algorithm 2')
# find minimum sequential time for each algorithm
min_seq_times = {}
for filename in filenames:
if filename in seq_times_alg_1:
min_seq_times[filename] = seq_times_alg_1[filename]
if filename in seq_times_alg_2:
if filename in min_seq_times:
min_seq_times[filename] = min(min_seq_times[filename], seq_times_alg_2[filename])
else:
min_seq_times[filename] = seq_times_alg_2[filename]
# plot data
plot_speedup(data_alg_1, min_seq_times, 27, axs, 2,
'Algorithm 1 using minimum sequential time')
plot_speedup(data_alg_2, min_seq_times, 27, axs, 3,
'Algorithm 2 using minimum sequential time')
plt.legend(bbox_to_anchor=(-0.1, -0.15), loc='upper center', ncol=7)
fig.suptitle('Speedup vs Number of Threads', fontsize=20)
# plot for sequential times grouped by algorithm using grouped bar chart
fig, ax = plt.subplots()
bar_width = 0.45
multiplier = 0
dict_seq_times = {'Algorithm 1': [], 'Algorithm 2': []}
for filename in seq_times_alg_1:
dict_seq_times['Algorithm 1'].append(seq_times_alg_1[filename])
if filename in seq_times_alg_2:
dict_seq_times['Algorithm 2'].append(seq_times_alg_2[filename])
else:
dict_seq_times['Algorithm 2'].append(0)
for attribute, measurement in dict_seq_times.items():
offset = bar_width * multiplier
rects = ax.bar(np.arange(len(seq_times_alg_1)) + offset, measurement, bar_width, label=attribute)
ax.bar_label(rects, padding=3)
multiplier += 1
plt.ylabel('Time (ms)')
plt.title('Sequential times')
ax.set_xticks(np.arange(len(seq_times_alg_1)) + bar_width, [filename[filename.index('_', filename.index('_') + 1)
+ 1:filename.index('.')] for filename in
seq_times_alg_1], rotation=20, ha='right')
plt.ylim([0, 35000])
plt.legend()
plt.grid()
# make plt bigger
fig = plt.gcf()
fig.set_size_inches(9, 7)
plt.show()
# plot differnce in speedup between src and src_old folders
data_alg_old = parse_speedup_csv('src_old/pagerank_speedup_soc_LiveJournal1.csv')
data_alg_new = parse_speedup_csv('src/pagerank_speedup_soc_LiveJournal1.csv')
fig, axs = plt.subplots(1, 1, figsize=(8, 5))
plot_speedup({'Dynamic scheduling': data_alg_new, 'Static scheduling': data_alg_old},
{'Dynamic scheduling': min_seq_times['pagerank_speedup_soc_LiveJournal1.csv'],
'Static scheduling': min_seq_times['pagerank_speedup_soc_LiveJournal1.csv']}, 27, axs, None,
'Static vs Dynamic scheduling speedup for soc_LiveJournal1 graph')
plt.show()