Skip to content

Commit 3ad67da

Browse files
committed
Add itlb_size test
1 parent 4cbc30f commit 3ad67da

File tree

4 files changed

+215
-0
lines changed

4 files changed

+215
-0
lines changed

meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ progs = [
7171
['elimination', false, true, true],
7272
['find_branch_misses_pmu', false, false, true],
7373
['ghr_size', false, true, true],
74+
['itlb_size', false, true, true],
7475
['phr_branch_bits_location', true, false, true],
7576
['phr_size', false, true, true],
7677
['phr_target_bits_location', true, false, true],

src/itlb_size.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include <assert.h>
2+
#include <stdio.h>
3+
#include <stdlib.h>
4+
#include <unistd.h>
5+
6+
extern void itlb_size(FILE *fp);
7+
extern bool avoid_hugepage_merging;
8+
extern int stride;
9+
extern int fake_page_size;
10+
int main(int argc, char *argv[]) {
11+
int opt;
12+
while ((opt = getopt(argc, argv, "hs:f:")) != -1) {
13+
switch (opt) {
14+
case 'h':
15+
avoid_hugepage_merging = true;
16+
break;
17+
case 's':
18+
sscanf(optarg, "%d", &stride);
19+
break;
20+
case 'f':
21+
sscanf(optarg, "%d", &fake_page_size);
22+
break;
23+
default:
24+
fprintf(stderr, "Usage: %s [-h] [-s stride] [-f page_size]\n", argv[0]);
25+
fprintf(stderr, "\t-h: avoid huge page merging\n");
26+
fprintf(stderr, "\t-s stride: set branch address stride\n");
27+
fprintf(stderr, "\t-f page_size: fake page size\n");
28+
exit(EXIT_FAILURE);
29+
}
30+
}
31+
32+
FILE *fp = fopen("itlb_size.csv", "w");
33+
assert(fp);
34+
itlb_size(fp);
35+
printf("Results are written to itlb_size.csv\n");
36+
return 0;
37+
}

src/itlb_size_gen.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include <cstdio>
2+
3+
int main(int argc, char *argv[]) {
4+
FILE *fp = fopen(argv[1], "w");
5+
// jit only
6+
fclose(fp);
7+
return 0;
8+
}

src/itlb_size_lib.cpp

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#include "include/jit.h"
2+
#include "include/utils.h"
3+
#include <assert.h>
4+
#include <cstdint>
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <unistd.h>
8+
#include <vector>
9+
10+
// args: loop count
11+
typedef void (*gadget)(size_t);
12+
13+
bool avoid_hugepage_merging = false;
14+
int stride = 64;
15+
int fake_page_size = -1;
16+
17+
void itlb_size(FILE *fp) {
18+
int loop_count = 1000;
19+
uint64_t min_size = 1;
20+
uint64_t max_size = 4096;
21+
bind_to_core();
22+
setup_perf_cycles();
23+
24+
size_t page_size = getpagesize();
25+
if (fake_page_size != -1) {
26+
page_size = fake_page_size;
27+
}
28+
printf("Page Size: %ld\n", page_size);
29+
30+
if (avoid_hugepage_merging) {
31+
printf("Avoiding hugepage merging\n");
32+
printf("Please disable THP via: echo never "
33+
">/sys/kernel/mm/transparent_hugepage/enabled\n");
34+
}
35+
printf("Branch stride: %dB\n", stride);
36+
37+
fprintf(fp, "size,min,avg,max\n");
38+
for (uint64_t size = min_size; size <= max_size; size++) {
39+
gadget entry = NULL;
40+
jit *jit_main = NULL;
41+
std::vector<jit *> jit_pages;
42+
if (avoid_hugepage_merging) {
43+
// create each page separately
44+
// generate addresses
45+
std::vector<size_t> page_addrs;
46+
std::vector<size_t> addrs;
47+
size_t start_addr = 0x100000000;
48+
addrs.push_back(start_addr);
49+
page_addrs.push_back(start_addr);
50+
for (uint64_t i = 1; i < size; i++) {
51+
size_t addr = page_addrs[page_addrs.size() - 1];
52+
addr += page_size;
53+
// span over multiple cachelines to avoid hitting icache capacity
54+
page_addrs.push_back(addr);
55+
addrs.push_back(addr + (i * stride) % page_size);
56+
}
57+
uint8_t *start = (uint8_t *)start_addr;
58+
for (uint64_t i = 0; i < size; i++) {
59+
jit *jit_page = new jit((void *)page_addrs[i], page_size);
60+
61+
uint8_t *begin = (uint8_t *)addrs[i];
62+
uint8_t *target = (uint8_t *)addrs[(i + 1) % addrs.size()];
63+
jit_page->set_cur(begin);
64+
if (i < size - 1) {
65+
#if defined(HOST_AARCH64)
66+
jit_page->b(target);
67+
#elif defined(HOST_AMD64)
68+
jit_page->jmp5(target);
69+
#endif
70+
} else {
71+
#if defined(HOST_AARCH64)
72+
// subs x0, x0, #1
73+
jit_page->subs64(0, 0, 1);
74+
uint8_t *end = jit_page->get_cur() + 8;
75+
// cbnz has limited imm range
76+
jit_page->cbz32(0, end);
77+
jit_page->b(start);
78+
// end:
79+
jit_page->ret();
80+
#elif defined(HOST_AMD64)
81+
jit_page->dec_r32(jit::DI);
82+
jit_page->jnz6(start);
83+
jit_page->ret();
84+
#endif
85+
}
86+
jit_page->protect();
87+
jit_pages.push_back(jit_page);
88+
}
89+
entry = (gadget)start;
90+
} else {
91+
size_t mapped_size = page_size * size;
92+
mapped_size = (mapped_size + 0x10000) & -0x10000;
93+
uint8_t *start = (uint8_t *)0x100000000;
94+
jit_main = new jit(start, mapped_size);
95+
for (uint64_t i = 0; i < size; i++) {
96+
// span over multiple cachelines to avoid hitting icache capacity
97+
uint8_t *begin = start + i * page_size + (i * stride) % page_size;
98+
uint8_t *target =
99+
start + (i + 1) * page_size + ((i + 1) * stride) % page_size;
100+
jit_main->set_cur(begin);
101+
if (i < size - 1) {
102+
#if defined(HOST_AARCH64)
103+
jit_main->b(target);
104+
#elif defined(HOST_AMD64)
105+
jit_main->jmp5(target);
106+
#endif
107+
} else {
108+
#if defined(HOST_AARCH64)
109+
// subs x0, x0, #1
110+
jit_main->subs64(0, 0, 1);
111+
uint8_t *end = jit_main->get_cur() + 8;
112+
// cbnz has limited imm range
113+
jit_main->cbz32(0, end);
114+
jit_main->b(start);
115+
// end:
116+
jit_main->ret();
117+
#elif defined(HOST_AMD64)
118+
jit_main->dec_r32(jit::DI);
119+
jit_main->jnz6(start);
120+
jit_main->ret();
121+
#endif
122+
}
123+
}
124+
jit_main->protect();
125+
// jit_main->dump();
126+
entry = (gadget)start;
127+
}
128+
129+
std::vector<double> history;
130+
int iterations = 30;
131+
history.reserve(iterations);
132+
133+
double sum = 0;
134+
// run several times
135+
for (int i = 0; i < iterations; i++) {
136+
uint64_t begin = perf_read_cycles();
137+
entry(loop_count);
138+
uint64_t elapsed = perf_read_cycles() - begin;
139+
140+
// skip warmup
141+
if (i >= 10) {
142+
double time = (double)elapsed / loop_count / size;
143+
history.push_back(time);
144+
sum += time;
145+
}
146+
}
147+
148+
if (jit_main) {
149+
delete jit_main;
150+
}
151+
for (jit *page : jit_pages) {
152+
delete page;
153+
}
154+
155+
double min = history[0];
156+
double max = history[0];
157+
for (size_t i = 0; i < history.size(); i++) {
158+
if (min > history[i]) {
159+
min = history[i];
160+
}
161+
if (max < history[i]) {
162+
max = history[i];
163+
}
164+
}
165+
fprintf(fp, "%ld,%.2lf,%.2lf,%.2lf\n", size, min, sum / history.size(),
166+
max);
167+
fflush(fp);
168+
}
169+
}

0 commit comments

Comments
 (0)