Skip to content

Commit e633e2b

Browse files
committed
Merge time reports CSV on lib
1 parent 329b384 commit e633e2b

File tree

2 files changed

+26
-17
lines changed

2 files changed

+26
-17
lines changed

makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ RUN_OMP = ./$(EXEC) -n $(processes) $(arguments_$(dataset))
8080
RUN_MPI = mpiexec -n $(processes) --oversubscribe ./$(EXEC) $(arguments_$(dataset))
8181

8282
RESULTS_FILE = csv/minhash_$(whichmp)_$(dataset)_$(processes).csv
83-
TIME_FILE = csv/time_$(whichmp)_$(dataset).csv
83+
TIME_FILE = csv/time_$(dataset).csv
8484

8585
# Compile targets
8686
$(EXEC): $(OBJS)

src/graph.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,12 @@ def draw_graph(
5454

5555
# Plot the data
5656
for name, df in dists.items():
57-
plt.plot(df[x_label], df[y_label], label=name, marker='o', markersize=3)
57+
if len(df) > 1:
58+
# Proper distribution, plot it
59+
plt.plot(df[x_label], df[y_label], label=name, marker='o', markersize=3)
60+
elif len(df) == 1:
61+
# Single point, plot it as a line
62+
plt.axline((0, df[y_label].values[0]), slope=0, label=name)
5863

5964
# Add labels
6065
plt.title(title)
@@ -77,9 +82,10 @@ def draw_graph(
7782
plt.show()
7883

7984

80-
def get_dataframe(csv_path: str) -> pandas.DataFrame:
85+
def get_time_dataframes(csv_path: str) -> Dict[str, pandas.DataFrame]:
8186
"""
82-
Reads a CSV file and returns its content as a DataFrame.
87+
Reads a CSV time report file and returns its content as a dict {lib:DataFrame}.
88+
The times are averaged for each n_processes.
8389
8490
Args:
8591
csv_path: Path to the CSV file.
@@ -89,31 +95,30 @@ def get_dataframe(csv_path: str) -> pandas.DataFrame:
8995
"""
9096

9197
# Read the CSV file
92-
df = pandas.read_csv(csv_path, usecols=["n_processes", "time_elapsed"])
98+
df = pandas.read_csv(csv_path, usecols=["lib", "n_processes", "time_elapsed"])
9399

94100
# Remove the last letter from the "time_elapsed" column and convert it to float
95101
df["time_elapsed"] = df["time_elapsed"].str[:-1].astype(float)
96102

97103
# Average the time for each n_processes
98-
df = df.groupby("n_processes").mean().reset_index()
104+
df = df.groupby(["lib", "n_processes"]).mean().reset_index()
105+
106+
# Divide the data into separate DataFrames
107+
dists = {
108+
lib: group_df
109+
for lib, group_df in df.groupby("lib")
110+
}
99111

100112
# Return the DataFrame
101-
return df
113+
return dists
102114

103115

104-
if __name__ == "__main__":
116+
def main():
105117
# Parse the command-line arguments
106118
args = parser.parse_args()
107119

108120
# Read the CSV files
109-
csv_paths = {
110-
lib: f"{args.in_csv_path}/time_{lib}_{args.dataset}.csv"
111-
for lib in ["MPI", "OMP"]
112-
}
113-
data = {
114-
lib: get_dataframe(csv_path)
115-
for lib, csv_path in csv_paths.items()
116-
}
121+
data = get_time_dataframes(f"{args.in_csv_path}/time_{args.dataset}.csv")
117122

118123
# Compute save path
119124
save_path = f"{args.out_png_path}/time_{args.dataset}.svg" if args.out_png_path else None
@@ -123,6 +128,10 @@ def get_dataframe(csv_path: str) -> pandas.DataFrame:
123128
dists=data,
124129
x_label="n_processes",
125130
y_label="time_elapsed",
126-
title="Execution time",
131+
title=f"Execution time ({args.dataset})",
127132
save_path=save_path
128133
)
134+
135+
136+
if __name__ == "__main__":
137+
main()

0 commit comments

Comments
 (0)