Skip to content

Commit ece590a

Browse files
committed
update cache analysis
1 parent bd62078 commit ece590a

File tree

12 files changed

+677
-431
lines changed

12 files changed

+677
-431
lines changed

benchmark/basic_performance/build/utils/build_utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,13 @@ def load_global_env():
104104
def clean(build_dir: Path, sudo: bool = False):
105105
build_dir = build_dir.resolve()
106106
if os.path.exists(build_dir):
107-
run(f"rm -rf {build_dir}", sudo=sudo)
107+
run(f"rm -rf {build_dir}", sudo=True)
108108

109109

110110
def make_build_dir(build_dir, sudo: bool = False):
111111
build_dir = build_dir.resolve()
112112
if not os.path.exists(build_dir):
113-
run(f"mkdir -p {build_dir}", sudo=sudo)
113+
run(f"mkdir -p {build_dir}", sudo=True)
114114

115115

116116
def get_threads_num():
@@ -131,6 +131,7 @@ def run_cmake(build_dir: Path, arch: str, sudo: bool = False):
131131
raise ValueError(f"Unknown machine type: {arch}")
132132
core_num_per_socket = get_cpu_number(arch)
133133
socket_num = get_socket_number(arch)
134+
print(f"machine_type: {machine_type}, core_num_per_socket: {core_num_per_socket}, socket_num: {socket_num}")
134135
run(
135136
" ".join(
136137
[
@@ -140,16 +141,16 @@ def run_cmake(build_dir: Path, arch: str, sudo: bool = False):
140141
f"-DMAX_SOCKET_NUM={socket_num}"
141142
]
142143
),
143-
sudo=sudo,
144+
sudo=True,
144145
pty=True,
145146
)
146147
numbers = get_threads_num()
147-
run(f"cmake --build . -j{numbers}", sudo=sudo)
148+
run(f"cmake --build . -j{numbers}", sudo=True)
148149

149150

150151
def run_build(build_dir: Path, arch, sudo=False):
151152
build_dir = build_dir.resolve()
152153

153-
clean(build_dir, sudo=sudo)
154-
make_build_dir(build_dir, sudo=sudo)
155-
run_cmake(build_dir, arch, sudo=sudo)
154+
clean(build_dir, sudo=True)
155+
make_build_dir(build_dir, sudo=True)
156+
run_cmake(build_dir, arch, sudo=True)
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
disable_prefetch=True
22
boost_cpu=True
3-
dimm_physical_start_addr = 0x800000000
4-
dimm_test_size=0x840000000
3+
dimm_physical_start_addr=0x800000000 # 32GB
4+
cxl_physical_start_addr=0x4080000000 # 258GB
5+
test_size=0x840000000 # 32GB test buffer + 1GB cindex buffer
6+
socket_number=2
7+
snc_mode=1
Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
disable_prefetch=False
1+
disable_prefetch=True
22
boost_cpu=True
3-
dimm_physical_start_addr=0x200000000
4-
dimm_test_size=0x840000000
3+
dimm_physical_start_addr=0x800000000 # 32GB
4+
cxl_physical_start_addr=0x2080000000 # 130GB
5+
test_size=0x840000000 # 32GB test buffer + 1GB cindex buffer
6+
socket_number=2
7+
snc_mode=1
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
disable_prefetch=True
22
boost_cpu=True
3-
dimm_physical_start_addr=0x800000000
4-
dimm_test_size=0x840000000
3+
dimm_physical_start_addr=0x800000000 # 32GB
4+
cxl_physical_start_addr=0x4080000000 # 258GB
5+
test_size=0x840000000 # 32GB test buffer + 1GB cindex buffer
6+
socket_number=2
7+
snc_mode=1
Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
job_id: 200 # enum class JobId : uint32_t @ src/core/system_define.h
2-
repeat: [32]
3-
core_id: [0]
4-
node_id: [0]
5-
use_flush: [0]
6-
access_order: [0]
7-
stride_size_array: [0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000,
8-
0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000,
9-
0x100000, 0x200000, 0x400000, 0x800000, 0x1000000,
10-
0x2000000, 0x4000000] # 64B ~ 64MB
11-
#stride_size_array: [0x40] # 64B
12-
block_num_array: [0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200,
13-
0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000,
14-
0x20000, 0x40000, 0x80000, 0x100000] # 1 ~ 1M
15-
#block_num_array: [0x1] # 1
16-
test_size_array: [0x800000000] # 32GB
17-
test_idx_array: [0x0]
2+
test_type: 0 # 0: measure access latency, 1: measure flush latency
3+
repeat: 32
4+
use_flush: 1 # 0: no flush, 1: use flush after one round of access
5+
core_id: [0, 20] # two cores on the different sockets
6+
node_id: [2] # access memory on node_id
7+
flush_type: [0] # 0: clflush, 1: clflushopt, 2: clwb
8+
ldst_type: [0] # 0: temporal # TODO: add support for 1: non-temporal, 2: atomic
9+
access_order: [0] # 0: random, 1: sequential
10+
stride_size_array: [0x40, 0x80, 0x100, 0x200,
11+
0x400, 0x800, 0x1000, 0x2000,
12+
0x4000, 0x8000, 0x10000, 0x20000,
13+
0x40000, 0x80000, 0x100000, 0x200000,
14+
0x400000, 0x800000, 0x1000000, 0x2000000,
15+
0x4000000] # 64B to 64MB
16+
# stride_size_array: [0x40]
17+
block_num_array: [0x1, 0x2, 0x4, 0x8,
18+
0x10, 0x20, 0x40, 0x80,
19+
0x100, 0x200, 0x400, 0x800,
20+
0x1000, 0x2000, 0x4000, 0x8000,
21+
0x10000, 0x20000, 0x40000, 0x80000,
22+
0x100000] # 1 to 1MB
23+
# block_num_array: [0x1]

benchmark/basic_performance/scripts/parse/cache_parser.py renamed to benchmark/basic_performance/scripts/plot/heatmap_plot.py

Lines changed: 80 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#
22
# MIT License
33
#
4-
# Copyright (c) 2025 Jangseon Park
4+
# Copyright (c) 2025 Jangseon Park, Luyi Li
55
# Affiliation: University of California San Diego CSE
6-
# Email: jap036@ucsd.edu
6+
# Email: jap036@ucsd.edu, lul@014@ucsd.edu
77
#
88
# Permission is hereby granted, free of charge, to any person obtaining a copy
99
# of this software and associated documentation files (the "Software"), to deal
@@ -36,7 +36,7 @@
3636

3737
# Function to read log data from a file
3838
def read_log_file(file_path):
39-
with open(file_path) as file:
39+
with open(file_path, "r") as file:
4040
return file.read()
4141

4242

@@ -48,36 +48,58 @@ def parse_pattern(base_dir):
4848
store_latency_ns_list = []
4949
load_latency_cycle_list = []
5050
load_latency_ns_list = []
51+
access_order_list = []
52+
snc_mode_list = []
53+
core_id_list = []
54+
node_id_list = []
55+
ldst_type_list = []
5156

5257
index = 0
5358
for root, _, files in os.walk(base_dir):
5459
if "result.log" in files:
5560
log_path = os.path.join(root, "result.log")
56-
with open(log_path) as f:
61+
with open(log_path, "r") as f:
5762
content = f.read()
5863
test_info = re.search(
59-
r"==========Test No.==========\n"
60-
r"Number of block: (\d+)\n"
61-
r"Stride Size: (\d+)\n"
62-
r"Average store time:\s+(\d+)\s+cycles,\s+([\d\.]+)\s+ns\n"
63-
r"Average load time:\s+(\d+)\s+cycles,\s+([\d\.]+)\s+ns",
64+
r"=============== Test Information ===============.*?"
65+
r"Number of Block:\s+(\d+).*?"
66+
r"Stride Size:\s+(\d+).*?"
67+
r"SNC Mode:\s+(\d+).*?"
68+
r"Core ID:\s+(\d+).*?"
69+
r"Node ID:\s+(\d+).*?"
70+
r"Access Order:\s+(\w+).*?"
71+
r"Load/Store Type:\s+(\w+).*?"
72+
r"=============== Test Results ===============.*?"
73+
r"Average Store Latency:\s+(\d+)\s+cycles,\s+([\d.]+)\s+ns.*?"
74+
r"Average Load Latency:\s+(\d+)\s+cycles,\s+([\d.]+)\s+ns",
6475
content,
76+
re.DOTALL # Use DOTALL instead of MULTILINE to match across lines
6577
)
6678
if test_info:
6779
(
6880
block_num,
6981
stride_size,
82+
snc_mode,
83+
core_id,
84+
node_id,
85+
access_order,
86+
ldst_type,
7087
store_lat_cycle,
7188
store_lat_ns,
7289
load_lat_cycle,
7390
load_lat_ns,
7491
) = test_info.groups()
75-
92+
7693
logger.info(
7794
", ".join(
7895
[
7996
f"Block Num: {block_num}",
8097
f"Stride Size: {stride_size}",
98+
f"SNC Mode: {snc_mode}",
99+
f"Core ID: {core_id}",
100+
f"Node ID: {node_id}",
101+
f"Access Order: {access_order}",
102+
f"Load/Store Type: {ldst_type}",
81103
f"Store Latency Cycle: {store_lat_cycle}",
82104
f"Store Latency ns: {store_lat_ns}",
83105
f"Load Latency Cycle: {load_lat_cycle}",
@@ -89,6 +111,11 @@ def parse_pattern(base_dir):
89111
test_index.append(index)
90112
block_result.append(int(block_num))
91113
stride_result.append(int(stride_size))
114+
snc_mode_list.append(int(snc_mode))
115+
core_id_list.append(int(core_id))
116+
node_id_list.append(int(node_id))
117+
access_order_list.append(access_order)
118+
ldst_type_list.append(ldst_type)
92119
store_latency_cycle_list.append(int(store_lat_cycle))
93120
store_latency_ns_list.append(float(store_lat_ns))
94121
load_latency_cycle_list.append(int(load_lat_cycle))
@@ -99,6 +126,11 @@ def parse_pattern(base_dir):
99126
test_index,
100127
block_result,
101128
stride_result,
129+
snc_mode_list,
130+
core_id_list,
131+
node_id_list,
132+
access_order_list,
133+
ldst_type_list,
102134
store_latency_cycle_list,
103135
store_latency_ns_list,
104136
load_latency_cycle_list,
@@ -112,6 +144,11 @@ def parse_results(log_file_path):
112144
test_index,
113145
block_num,
114146
stride_size,
147+
snc_mode,
148+
core_id,
149+
node_id,
150+
access_order,
151+
ldst_type,
115152
store_latency_cycle,
116153
store_latency_ns,
117154
load_latency_cycle,
@@ -120,40 +157,18 @@ def parse_results(log_file_path):
120157

121158
# Initialize an empty list to store the parsed data
122159
data = []
123-
124-
# Regex patterns to capture relevant information
125-
# test_pattern = r"==========Test No\.(\d+)=========="
126-
# block_pattern = r"Number of block: (\d+)"
127-
# stride_pattern = r"Stride Size: (\d+)"
128-
# store_pattern = r"Average store time:\s+(\d+)\s+cycles,\s+([\d\.]+)\s+ns"
129-
# load_pattern = r"Average load time:\s+(\d+)\s+cycles,\s+([\d\.]+)\s+ns"
130-
131-
# Parse the log data
132-
# test_index = re.findall(test_pattern, log_data)
133-
# block_num = re.findall(block_pattern, log_data)
134-
# stride_size = re.findall(stride_pattern, log_data)
135-
# store_results = re.findall(store_pattern, log_data)
136-
# load_results = re.findall(load_pattern, log_data)
137-
138-
# store_latency_cycle = []
139-
# store_latency_ns = []
140-
# for store_result in store_results:
141-
# store_latency_cycle.append(store_result[0])
142-
# store_latency_ns.append(store_result[1])
143-
144-
# load_latency_cycle = []
145-
# load_latency_ns = []
146-
# for load_result in load_results:
147-
# load_latency_cycle.append(load_result[0])
148-
# load_latency_ns.append(load_result[1])
149-
150160
# Store the parsed data in a list
151161
for i in range(len(test_index)):
152162
data.append(
153163
{
154164
"test_index": int(test_index[i]),
155165
"block_num": int(block_num[i]),
156166
"stride_size": int(stride_size[i]),
167+
"snc_mode": int(snc_mode[i]),
168+
"core_id": int(core_id[i]),
169+
"node_id": int(node_id[i]),
170+
"access_order": access_order[i],
171+
"ldst_type": ldst_type[i],
157172
"store_latency_cycle": int(store_latency_cycle[i]),
158173
"load_latency_cycle": int(load_latency_cycle[i]),
159174
"store_latency_ns": int(store_latency_ns[i]),
@@ -168,14 +183,14 @@ def parse_results(log_file_path):
168183

169184
def format_label(value):
170185
if value >= 2**20:
171-
return f"{int(value / 2**20)}M"
186+
return f"{int(value / 2 ** 20)}M"
172187
elif value >= 2**10:
173-
return f"{int(value / 2**10)}K"
188+
return f"{int(value / 2 ** 10)}K"
174189
else:
175190
return str(value)
176191

177192

178-
def plot_heatmap(df, access_op, base_dir):
193+
def plot_heatmap(base_dir, df, access_op, snc_mode, core_id, node_id, access_order, ldst_type):
179194
if access_op == "load":
180195
pivot_table_value = "load_latency_ns"
181196
elif access_op == "store":
@@ -197,22 +212,41 @@ def plot_heatmap(df, access_op, base_dir):
197212
pivot_table = pivot_table.iloc[::-1]
198213

199214
plt.figure(figsize=(10, 10))
200-
ax = sns.heatmap(pivot_table, annot=False, cmap="plasma")
215+
ax = sns.heatmap(pivot_table, annot=False, cmap="plasma", vmin=0, vmax=800)
201216

202217
ax.set_xticks(np.arange(len(x_ticks)))
203218
ax.set_xticklabels([format_label(tick) for tick in x_ticks])
204219
ax.set_yticks(np.arange(len(y_ticks)))
205220
ax.set_yticklabels([format_label(tick) for tick in reversed(y_ticks)])
206221

207-
plt.title(f"heatmap_{access_op}")
222+
# plt.title(f"heatmap_{access_op}")
208223
plt.xlabel("Stride Size (Byte)")
209224
plt.ylabel("Number of Blocks")
210-
plt.savefig(f"{base_dir}/heatmap_{access_op}.pdf")
225+
plt.savefig(f"{base_dir}/heatmap_{access_op}_snc{snc_mode}_core{core_id}_node{node_id}_{access_order}_{ldst_type}.pdf")
211226
plt.close()
212227

213228

214-
def parse_and_plot(base_dir):
229+
def parse_plot_heatmap(base_dir):
215230
results_df = parse_results(base_dir)
216231
results_df.to_csv(f"{base_dir}/results.csv", index=False)
217-
plot_heatmap(results_df, "load", base_dir)
218-
plot_heatmap(results_df, "store", base_dir)
232+
snc_modes = results_df['snc_mode'].unique()
233+
core_ids = results_df['core_id'].unique()
234+
node_ids = results_df['node_id'].unique()
235+
access_orders = results_df['access_order'].unique()
236+
ldst_types = results_df['ldst_type'].unique()
237+
for snc_mode in snc_modes:
238+
for core_id in core_ids:
239+
for node_id in node_ids:
240+
for access_order in access_orders:
241+
for ldst_type in ldst_types:
242+
filtered_df = results_df[
243+
(results_df["snc_mode"] == snc_mode)
244+
& (results_df["core_id"] == core_id)
245+
& (results_df["node_id"] == node_id)
246+
& (results_df["access_order"] == access_order)
247+
& (results_df["ldst_type"] == ldst_type)
248+
]
249+
if not filtered_df.empty:
250+
plot_heatmap(base_dir, filtered_df, "load", snc_mode, core_id, node_id, access_order, ldst_type)
251+
plot_heatmap(base_dir, filtered_df, "store", snc_mode, core_id, node_id, access_order, ldst_type)
252+

0 commit comments

Comments
 (0)