|
| 1 | +#include "include/jit.h" |
| 2 | +#include "include/utils.h" |
| 3 | +#include <assert.h> |
| 4 | +#include <cstdint> |
| 5 | +#include <stdio.h> |
| 6 | +#include <stdlib.h> |
| 7 | +#include <unistd.h> |
| 8 | +#include <vector> |
| 9 | + |
| 10 | +// args: loop count |
| 11 | +typedef void (*gadget)(size_t); |
| 12 | + |
| 13 | +bool avoid_hugepage_merging = false; |
| 14 | +int stride = 64; |
| 15 | +int fake_page_size = -1; |
| 16 | + |
| 17 | +void itlb_size(FILE *fp) { |
| 18 | + int loop_count = 1000; |
| 19 | + uint64_t min_size = 1; |
| 20 | + uint64_t max_size = 4096; |
| 21 | + bind_to_core(); |
| 22 | + setup_perf_cycles(); |
| 23 | + |
| 24 | + size_t page_size = getpagesize(); |
| 25 | + if (fake_page_size != -1) { |
| 26 | + page_size = fake_page_size; |
| 27 | + } |
| 28 | + printf("Page Size: %ld\n", page_size); |
| 29 | + |
| 30 | + if (avoid_hugepage_merging) { |
| 31 | + printf("Avoiding hugepage merging\n"); |
| 32 | + printf("Please disable THP via: echo never " |
| 33 | + ">/sys/kernel/mm/transparent_hugepage/enabled\n"); |
| 34 | + } |
| 35 | + printf("Branch stride: %dB\n", stride); |
| 36 | + |
| 37 | + fprintf(fp, "size,min,avg,max\n"); |
| 38 | + for (uint64_t size = min_size; size <= max_size; size++) { |
| 39 | + gadget entry = NULL; |
| 40 | + jit *jit_main = NULL; |
| 41 | + std::vector<jit *> jit_pages; |
| 42 | + if (avoid_hugepage_merging) { |
| 43 | + // create each page separately |
| 44 | + // generate addresses |
| 45 | + std::vector<size_t> page_addrs; |
| 46 | + std::vector<size_t> addrs; |
| 47 | + size_t start_addr = 0x100000000; |
| 48 | + addrs.push_back(start_addr); |
| 49 | + page_addrs.push_back(start_addr); |
| 50 | + for (uint64_t i = 1; i < size; i++) { |
| 51 | + size_t addr = page_addrs[page_addrs.size() - 1]; |
| 52 | + addr += page_size; |
| 53 | + // span over multiple cachelines to avoid hitting icache capacity |
| 54 | + page_addrs.push_back(addr); |
| 55 | + addrs.push_back(addr + (i * stride) % page_size); |
| 56 | + } |
| 57 | + uint8_t *start = (uint8_t *)start_addr; |
| 58 | + for (uint64_t i = 0; i < size; i++) { |
| 59 | + jit *jit_page = new jit((void *)page_addrs[i], page_size); |
| 60 | + |
| 61 | + uint8_t *begin = (uint8_t *)addrs[i]; |
| 62 | + uint8_t *target = (uint8_t *)addrs[(i + 1) % addrs.size()]; |
| 63 | + jit_page->set_cur(begin); |
| 64 | + if (i < size - 1) { |
| 65 | +#if defined(HOST_AARCH64) |
| 66 | + jit_page->b(target); |
| 67 | +#elif defined(HOST_AMD64) |
| 68 | + jit_page->jmp5(target); |
| 69 | +#endif |
| 70 | + } else { |
| 71 | +#if defined(HOST_AARCH64) |
| 72 | + // subs x0, x0, #1 |
| 73 | + jit_page->subs64(0, 0, 1); |
| 74 | + uint8_t *end = jit_page->get_cur() + 8; |
| 75 | + // cbnz has limited imm range |
| 76 | + jit_page->cbz32(0, end); |
| 77 | + jit_page->b(start); |
| 78 | + // end: |
| 79 | + jit_page->ret(); |
| 80 | +#elif defined(HOST_AMD64) |
| 81 | + jit_page->dec_r32(jit::DI); |
| 82 | + jit_page->jnz6(start); |
| 83 | + jit_page->ret(); |
| 84 | +#endif |
| 85 | + } |
| 86 | + jit_page->protect(); |
| 87 | + jit_pages.push_back(jit_page); |
| 88 | + } |
| 89 | + entry = (gadget)start; |
| 90 | + } else { |
| 91 | + size_t mapped_size = page_size * size; |
| 92 | + mapped_size = (mapped_size + 0x10000) & -0x10000; |
| 93 | + uint8_t *start = (uint8_t *)0x100000000; |
| 94 | + jit_main = new jit(start, mapped_size); |
| 95 | + for (uint64_t i = 0; i < size; i++) { |
| 96 | + // span over multiple cachelines to avoid hitting icache capacity |
| 97 | + uint8_t *begin = start + i * page_size + (i * stride) % page_size; |
| 98 | + uint8_t *target = |
| 99 | + start + (i + 1) * page_size + ((i + 1) * stride) % page_size; |
| 100 | + jit_main->set_cur(begin); |
| 101 | + if (i < size - 1) { |
| 102 | +#if defined(HOST_AARCH64) |
| 103 | + jit_main->b(target); |
| 104 | +#elif defined(HOST_AMD64) |
| 105 | + jit_main->jmp5(target); |
| 106 | +#endif |
| 107 | + } else { |
| 108 | +#if defined(HOST_AARCH64) |
| 109 | + // subs x0, x0, #1 |
| 110 | + jit_main->subs64(0, 0, 1); |
| 111 | + uint8_t *end = jit_main->get_cur() + 8; |
| 112 | + // cbnz has limited imm range |
| 113 | + jit_main->cbz32(0, end); |
| 114 | + jit_main->b(start); |
| 115 | + // end: |
| 116 | + jit_main->ret(); |
| 117 | +#elif defined(HOST_AMD64) |
| 118 | + jit_main->dec_r32(jit::DI); |
| 119 | + jit_main->jnz6(start); |
| 120 | + jit_main->ret(); |
| 121 | +#endif |
| 122 | + } |
| 123 | + } |
| 124 | + jit_main->protect(); |
| 125 | + // jit_main->dump(); |
| 126 | + entry = (gadget)start; |
| 127 | + } |
| 128 | + |
| 129 | + std::vector<double> history; |
| 130 | + int iterations = 30; |
| 131 | + history.reserve(iterations); |
| 132 | + |
| 133 | + double sum = 0; |
| 134 | + // run several times |
| 135 | + for (int i = 0; i < iterations; i++) { |
| 136 | + uint64_t begin = perf_read_cycles(); |
| 137 | + entry(loop_count); |
| 138 | + uint64_t elapsed = perf_read_cycles() - begin; |
| 139 | + |
| 140 | + // skip warmup |
| 141 | + if (i >= 10) { |
| 142 | + double time = (double)elapsed / loop_count / size; |
| 143 | + history.push_back(time); |
| 144 | + sum += time; |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + if (jit_main) { |
| 149 | + delete jit_main; |
| 150 | + } |
| 151 | + for (jit *page : jit_pages) { |
| 152 | + delete page; |
| 153 | + } |
| 154 | + |
| 155 | + double min = history[0]; |
| 156 | + double max = history[0]; |
| 157 | + for (size_t i = 0; i < history.size(); i++) { |
| 158 | + if (min > history[i]) { |
| 159 | + min = history[i]; |
| 160 | + } |
| 161 | + if (max < history[i]) { |
| 162 | + max = history[i]; |
| 163 | + } |
| 164 | + } |
| 165 | + fprintf(fp, "%ld,%.2lf,%.2lf,%.2lf\n", size, min, sum / history.size(), |
| 166 | + max); |
| 167 | + fflush(fp); |
| 168 | + } |
| 169 | +} |
0 commit comments