-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplots.py
More file actions
258 lines (204 loc) · 12.3 KB
/
plots.py
File metadata and controls
258 lines (204 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 1 16:33:39 2024
@author: vamsi
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
plotsdir = "/home/vamsi/src/phd/writings/rdcn-throughput/master/plots/"
directory="/home/vamsi/src/phd/codebase/rdcn-throughput/"
#%%
plt.rcParams.update({'font.size': 14})
# Load the updated dataset
df_updated = pd.read_csv(directory+'dump/throughput.csv')
###################################################
# First, filter entries for both 'demand-aware-static' and 'static' networkTypes
demand_aware_static = df_updated[df_updated['networkType'] == 'demand-aware-static']
static = df_updated[df_updated['networkType'] == 'static']
# Prepare to update the throughput values for 'demand-aware-static' with the maximum between
# 'demand-aware-static' and 'static' for the corresponding degree and matrix
# This involves merging the two filtered dataframes on 'degree' and 'matrix', then comparing throughput values
# Merge on 'degree' and 'matrix'
merged = pd.merge(demand_aware_static, static, on=['degree', 'matrix'], suffixes=('_das', '_static'))
# Calculate the maximum throughput value between 'demand-aware-static' and 'static' for each pair
merged['throughput_max'] = merged[['throughput_das', 'throughput_static']].max(axis=1)
# Update the 'demand-aware-static' entries in the original dataframe with these maximum throughput values
for index, row in merged.iterrows():
df_updated.loc[(df_updated['networkType'] == 'demand-aware-static') & (df_updated['degree'] == row['degree']) & (df_updated['matrix'] == row['matrix']), 'throughput'] = row['throughput_max']
###################################################
for degree in [16, 14, 12, 10, 8, 6, 4]:
# Filter the dataset for entries where degree is 4
df_updated_degree = df_updated[df_updated['degree'] == degree]
# Mapping of matrix names to updated names for clarity
matrices = [
"chessboard-16", "uniform-16", "permutation-16", "skew-16-0.0", "skew-16-0.1", "skew-16-0.2",
"skew-16-0.3", "skew-16-0.4", "skew-16-0.5", "skew-16-0.6", "skew-16-0.7", "skew-16-0.8",
"skew-16-0.9", "skew-16-1.0", "data-parallelism", "hybrid-parallelism", "heatmap2", "heatmap3"
]
updated_names = [
"Chessboard", "Uniform", "Permutation", "U+P 0", "U+P 0.1", "U+P 0.2", "U+P 0.3", "U+P 0.4",
"U+P 0.5", "U+P 0.6", "U+P 0.7", "U+P 0.8", "U+P 0.9", "U+P 1.0", "Data parallelism",
"Hybrid parallelism", "DLRM +3 perm", "DLRM +7 perm"
]
######
matrix_mapping = dict(zip(matrices, updated_names))
df_updated_degree['matrix'] = df_updated_degree['matrix'].map(matrix_mapping)
# excluding U+P=0, this is same as uniform
df_updated_degree = df_updated_degree[df_updated_degree['matrix'] != 'U+P 0']
# excluding U+P=1, this is same as permutation
df_updated_degree = df_updated_degree[df_updated_degree['matrix'] != 'U+P 1.0']
custom_order = [
"Chessboard", "Uniform", "U+P 0.1", "U+P 0.2", "U+P 0.3", "U+P 0.4",
"U+P 0.5", "U+P 0.6", "U+P 0.7", "U+P 0.8", "U+P 0.9", "Permutation", "Data parallelism",
"Hybrid parallelism", "DLRM +3 perm", "DLRM +7 perm"
]
# Ensure the dataset is ordered according to the custom order
# This step involves mapping the custom order to an orderable list (like integers) that pandas can sort by
order_mapping = {matrix: i for i, matrix in enumerate(custom_order)}
df_updated_degree['order'] = df_updated_degree['matrix'].map(order_mapping)
# Now sort by this order
df_updated_degree = df_updated_degree.sort_values(by='order')
# Unique matrices and network types for plotting
unique_matrices = df_updated_degree['matrix'].unique()
unique_network_types = ["static","demand-aware-static","oblivious","demand-aware-periodic"]
network_type_labels={}
network_type_labels["static"]="Static (not reconfigurable)"
network_type_labels["demand-aware-static"]="Demand-aware static (one-shot reconfigurable)"
network_type_labels["oblivious"]="Demand-oblivious (reconfigurable)"
network_type_labels["demand-aware-periodic"]="Demand-aware periodic (reconfigurable)"
# Manual color selection for bars
colors = ['#7dcdf5', '#d7f57d', '#e87d5f', '#5fe87f', 'sandybrown', 'lightcoral', 'grey', 'gold']
# Hatches for further distinction among bars
hatches = ['/', '\\', '-', 'x', '+', '|', 'o', 'O', '.', '*']
# Setting up the figure
fig, ax = plt.subplots(figsize=(12, 5))
# Bar width and group settings for spacing
bar_width = 0.3
group_width = bar_width * len(unique_network_types) + (bar_width * 0.5) # Original spacing
group_spacing = bar_width * 1.5 # Additional spacing between groups
group_positions = [i * (group_width + group_spacing) for i in range(len(unique_matrices))]
# Plotting bars with adjusted settings
for i, network_type in enumerate(unique_network_types):
positions = [x + (bar_width * i) for x in group_positions]
throughput_values = [
df_updated_degree[(df_updated_degree['matrix'] == matrix) & (df_updated_degree['networkType'] == network_type)]['throughput'].values[0]
if df_updated_degree[(df_updated_degree['matrix'] == matrix) & (df_updated_degree['networkType'] == network_type)].shape[0] > 0
else 0
for matrix in unique_matrices
]
ax.bar(positions, throughput_values, color=colors[i % len(colors)], width=bar_width, edgecolor='black', label=network_type_labels[network_type], hatch=hatches[i % len(hatches)],alpha=0.6)
ax.scatter(positions[np.argmin(throughput_values)], throughput_values[np.argmin(throughput_values)], marker = "*", s=200, c=colors[i % len(colors)],edgecolors='black')
indexpos = throughput_values.index(sorted(throughput_values)[1])
secondmin = sorted(throughput_values)[1]
ax.scatter(positions[indexpos], secondmin, marker = "*", s=200, c=colors[i % len(colors)],edgecolors='black')
# Final plot adjustments for aesthetics
if degree==4 or degree==16:
ax.set_xlabel('Demand Matrix')
ax.set_ylabel('Throughput')
ax.set_ylim(0.2,1.1)
ax.set_xticks([r + (group_width + group_spacing)/2 - bar_width/2 for r in group_positions])
ax.set_xticklabels(unique_matrices, rotation=25, ha="right")
if degree==4 or degree==8:
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.2), ncol=2,framealpha=1)
ax.yaxis.grid(True, linestyle='--')
ax.set_xlim([group_positions[0] - bar_width*2, group_positions[-1] + bar_width * 5])
# if degree!=4:
# ax.text(0.5, 0.85, 'Degree = '+str(degree), ha='center', va='bottom', transform=ax.transAxes)
plt.tight_layout()
plt.show()
fig.savefig(plotsdir+'degree-'+str(degree)+'.pdf')
#%%
plt.rcParams.update({'font.size': 18})
# Load the dataset
df_updated = pd.read_csv(directory+'dump/throughput.csv')
###################################################
# First, filter entries for both 'demand-aware-static' and 'static' networkTypes
demand_aware_static = df_updated[df_updated['networkType'] == 'demand-aware-static']
static = df_updated[df_updated['networkType'] == 'static']
# Prepare to update the throughput values for 'demand-aware-static' with the maximum between
# 'demand-aware-static' and 'static' for the corresponding degree and matrix
# This involves merging the two filtered dataframes on 'degree' and 'matrix', then comparing throughput values
# Merge on 'degree' and 'matrix'
merged = pd.merge(demand_aware_static, static, on=['degree', 'matrix'], suffixes=('_das', '_static'))
# Calculate the maximum throughput value between 'demand-aware-static' and 'static' for each pair
merged['throughput_max'] = merged[['throughput_das', 'throughput_static']].max(axis=1)
# Update the 'demand-aware-static' entries in the original dataframe with these maximum throughput values
for index, row in merged.iterrows():
df_updated.loc[(df_updated['networkType'] == 'demand-aware-static') & (df_updated['degree'] == row['degree']) & (df_updated['matrix'] == row['matrix']), 'throughput'] = row['throughput_max']
###################################################
# Group by 'degree' and 'networkType', then calculate the minimum 'throughput' for each group
min_throughput = df_updated.groupby(['degree', 'networkType'])['throughput'].min().reset_index()
# Define distinct markers and colors for the plot
markers = ['o', '^', 's', 'D', 'v', '<', '>', 'p', '*', 'h', 'x']
colors = ['#88b7db', '#dbd788', '#c27676', '#7dc276', 'sandybrown', 'lightcoral', 'grey', 'gold']
# Plotting
fig, ax = plt.subplots(figsize=(8,6))
# Unique network types for plotting
network_types = min_throughput['networkType'].unique()
network_types=["static","demand-aware-static","oblivious","demand-aware-periodic"]
network_type_labels={}
network_type_labels["static"]="Static"
network_type_labels["demand-aware-static"]="Demand-aware static"
network_type_labels["oblivious"]="Demand-oblivious"
network_type_labels["demand-aware-periodic"]="Demand-aware periodic"
for i, network_type in enumerate(network_types):
# Ensure cycling through colors and markers for different network types
color = colors[i % len(colors)]
marker = markers[i % len(markers)]
# Filter data for the current network type
data = min_throughput[min_throughput['networkType'] == network_type]
# Plot with customizations
ax.plot(data['degree'], data['throughput'], marker=marker, markersize=20, linewidth=4, color=color, label=network_type_labels[network_type])
# Final plot adjustments
ax.set_xlabel('Degree')
ax.set_ylabel('Throughput (worst-case)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=2,framealpha=0)
ax.xaxis.grid(True,ls='--')
ax.yaxis.grid(True,ls='--')
ax.set_ylim(0.2,1)
y_start = min_throughput[(min_throughput['networkType'] == 'oblivious') & (min_throughput['degree'] == 4)]['throughput'].values[0]
y_end = min_throughput[(min_throughput['networkType'] == 'demand-aware-periodic') & (min_throughput['degree'] == 4)]['throughput'].values[0]-0.1
# Add arrow
ax.annotate('', xy=(14, y_end), xytext=(14, y_start),
arrowprops=dict(arrowstyle="-|>", color='black',lw=3))
# Add text for "30% improvement"
ax.text(16.5, y_end, '30% improvement', horizontalalignment='right',fontstyle='italic')
plt.show()
fig.tight_layout()
fig.savefig(plotsdir+'min-throughput.pdf')
# df_updated.to_csv(directory+"dump/throughput-updated.csv")
#%%
# Filter the dataset for entries with degree = 4
df_degree_4 = df_updated[df_updated['degree'] == 4]
# Prepare a list to store the improvement results
improvement_results = []
# Iterate over each matrix to calculate the percentage improvement and improvement factor
for matrix in df_degree_4['matrix'].unique():
# Filter for 'demand-aware-periodic' throughput at the current matrix and degree
dap_throughput = df_degree_4[(df_degree_4['matrix'] == matrix) & (df_degree_4['networkType'] == 'demand-aware-periodic')]['throughput'].values
if dap_throughput.size > 0:
dap_throughput = dap_throughput[0]
# Compare against each other network type
for network_type in df_degree_4['networkType'].unique():
if network_type != 'demand-aware-periodic':
other_throughput = df_degree_4[(df_degree_4['matrix'] == matrix) & (df_degree_4['networkType'] == network_type)]['throughput'].values
if other_throughput.size > 0:
other_throughput = other_throughput[0]
# Calculate percentage improvement and improvement factor
percentage_improvement = ((dap_throughput - other_throughput) / other_throughput) * 100
improvement_factor = 1 + (percentage_improvement / 100)
# Store the results
improvement_results.append({
'Matrix': matrix,
'Compared Network Type': network_type,
'Percentage Improvement': percentage_improvement,
'Improvement Factor': improvement_factor
})
# Convert the results list to a DataFrame for easier viewing
df_improvement_results = pd.DataFrame(improvement_results)
# Display the first few rows of the improvement results
# print(df_improvement_results.head())
df_improvement_results.to_csv(directory+'dump/comparisons.csv')