|
17 | 17 | * |
18 | 18 | */ |
19 | 19 |
|
| 20 | +#include <cpuid.h> |
20 | 21 | #include <errno.h> |
21 | 22 | #include <stdlib.h> |
| 23 | +#include <unistd.h> |
22 | 24 |
|
23 | 25 | #include "manifest.h" |
24 | 26 | #include "swupd.h" |
25 | 27 |
|
26 | 28 | #define MANIFEST_LINE_MAXLEN (PATH_MAX * 2) |
27 | 29 | #define MANIFEST_HEADER "MANIFEST\t" |
28 | 30 |
|
| 31 | +const char AVX2_SKIP_FILE[] = "/etc/clear/elf-replace-avx2"; |
| 32 | +const char AVX512_SKIP_FILE[] = "/etc/clear/elf-replace-avx512"; |
| 33 | + |
29 | 34 | /* Below generated with the following: |
30 | 35 |
|
31 | 36 | unsigned char lt[256] = ".acdefghijklmnopqrtuvwxyzABDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#^*bsC"; |
|
36 | 41 | ltr[0] = 0; |
37 | 42 | ltr['b'] = ltr['s'] = ltr['C'] = 0; |
38 | 43 |
|
39 | | - printf("static unsigned char OPTIMIZED_BITMASKS[256] = { "); |
| 44 | + printf("static unsigned char OPTIMIZED_BITMASKS[256] = {\n"); |
40 | 45 | for (int i = 0; i < 256; i++) { |
41 | 46 | if (i < 255) { |
42 | | - printf("0x%X, ", ltr[i]); |
| 47 | + printf("0x%X,\n", ltr[i]); |
43 | 48 | } else { |
44 | | - printf("0x%X ", ltr[i]); |
| 49 | + printf("0x%X\n", ltr[i]); |
45 | 50 | } |
46 | 51 | } |
47 | 52 | printf("};\n"); |
|
50 | 55 | /* Changes to the OPTIMIZED_BITMASKS array require a format bump and corresponding mixer change */ |
51 | 56 | static unsigned char OPTIMIZED_BITMASKS[256] = { 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X3C, 0X0, 0X3D, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X3F, 0X0, 0X0, 0X0, 0X0, 0X0, 0X32, 0X33, 0X34, 0X35, 0X36, 0X37, 0X38, 0X39, 0X3A, 0X3B, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X19, 0X1A, 0X0, 0X1B, 0X1C, 0X1D, 0X1E, 0X1F, 0X20, 0X21, 0X22, 0X23, 0X24, 0X25, 0X26, 0X27, 0X28, 0X29, 0X2A, 0X2B, 0X2C, 0X2D, 0X2E, 0X2F, 0X30, 0X31, 0X0, 0X0, 0X0, 0X3E, 0X0, 0X0, 0X1, 0X0, 0X2, 0X3, 0X4, 0X5, 0X6, 0X7, 0X8, 0X9, 0XA, 0XB, 0XC, 0XD, 0XE, 0XF, 0X10, 0X11, 0X0, 0X12, 0X13, 0X14, 0X15, 0X16, 0X17, 0X18, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0 }; |
52 | 57 |
|
53 | | -int set_opts(struct file *file, unsigned char mask) |
| 58 | +/* Any changes to LOOKUP_OPTIMIZED_BITMASKS must match mixer, |
| 59 | + requires format bump */ |
| 60 | +static uint64_t LOOKUP_OPTIMIZED_BITMASKS[256] = { |
| 61 | + [SSE_0] = (SSE << 8) | SSE, |
| 62 | + [SSE_1] = (SSE << 8) | SSE | AVX2, |
| 63 | + [SSE_2] = (SSE << 8) | SSE | AVX512, |
| 64 | + [SSE_3] = (SSE << 8) | SSE | AVX2 | AVX512, |
| 65 | + [AVX2_1] = (AVX2 << 8) | SSE | AVX2, |
| 66 | + [AVX2_3] = (AVX2 << 8) | SSE | AVX2 | AVX512, |
| 67 | + [AVX512_2] = (AVX512 << 8) | SSE | AVX512, |
| 68 | + [AVX512_3] = (AVX512 << 8) | SSE | AVX2 | AVX512, |
| 69 | +}; |
| 70 | + |
| 71 | +/* some CPUs support avx512 but it's not helpful for performance ... skip avx512 there */ |
| 72 | +int avx512_is_unhelpful(void) |
54 | 73 | { |
55 | | - file->opt_mask = mask; |
56 | | - switch (file->opt_mask) { |
57 | | - case SSE_0: |
58 | | - file->opt_level = SSE; |
59 | | - file->available_levels = SSE; |
60 | | - break; |
61 | | - case SSE_1: |
62 | | - file->opt_level = SSE; |
63 | | - file->available_levels = SSE | AVX2; |
64 | | - break; |
65 | | - case SSE_2: |
66 | | - file->opt_level = SSE; |
67 | | - file->available_levels = SSE | AVX512; |
68 | | - break; |
69 | | - case SSE_3: |
70 | | - file->opt_level = SSE; |
71 | | - file->available_levels = SSE | AVX2 | AVX512; |
72 | | - break; |
73 | | - case AVX2_1: |
74 | | - file->opt_level = AVX2; |
75 | | - file->available_levels = SSE | AVX2; |
76 | | - break; |
77 | | - case AVX2_3: |
78 | | - file->opt_level = AVX2; |
79 | | - file->available_levels = SSE | AVX2 | AVX512; |
80 | | - break; |
81 | | - case AVX512_2: |
82 | | - file->opt_level = AVX512; |
83 | | - file->available_levels = SSE | AVX512; |
84 | | - break; |
85 | | - case AVX512_3: |
86 | | - file->opt_level = AVX512; |
87 | | - file->available_levels = SSE | AVX2 | AVX512; |
88 | | - break; |
89 | | - default: |
90 | | - return -1; |
| 74 | + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; |
| 75 | + uint32_t model; |
| 76 | + |
| 77 | + /* currently only Intel cpus are on the list */ |
| 78 | + if (!__builtin_cpu_is("intel")) |
| 79 | + return 0; |
| 80 | + |
| 81 | + __cpuid(1, eax, ebx, ecx, edx); |
| 82 | + model = (eax >> 4) & 0xf; |
| 83 | + model += ((eax >> (16-4)) & 0xf0); |
| 84 | + |
| 85 | + /* tigerlake */ |
| 86 | + if (model == 0x8C) { |
| 87 | + return 1; |
| 88 | + } |
| 89 | + if (model == 0x8D) { |
| 90 | + return 1; |
| 91 | + } |
| 92 | + |
| 93 | + return 0; |
| 94 | +} |
| 95 | + |
| 96 | +uint64_t get_opt_level_mask(void) |
| 97 | +{ |
| 98 | + char *avx2_skip_file = NULL; |
| 99 | + char *avx512_skip_file = NULL; |
| 100 | + uint64_t opt_level_mask = 0; |
| 101 | + |
| 102 | + avx2_skip_file = sys_path_join("%s/%s", globals.path_prefix, AVX2_SKIP_FILE); |
| 103 | + avx512_skip_file = sys_path_join("%s/%s", globals.path_prefix, AVX512_SKIP_FILE); |
| 104 | + |
| 105 | + if (__builtin_cpu_supports("avx512vl")) { |
| 106 | + opt_level_mask = (AVX512 << 8) | AVX512 | AVX2 | SSE; |
| 107 | + } |
| 108 | + else if (__builtin_cpu_supports("avx2") && __builtin_cpu_supports("fma") ) { |
| 109 | + opt_level_mask = (AVX2 << 8) | AVX2 | SSE; |
| 110 | + } else { |
| 111 | + /* SSE is zero but keep the same logic as others */ |
| 112 | + opt_level_mask = (SSE << 8) | SSE; |
| 113 | + } |
| 114 | + |
| 115 | + if (opt_level_mask & AVX512 && avx512_is_unhelpful()) { |
| 116 | + /* Pretend we are AVX2 only */ |
| 117 | + opt_level_mask = (AVX2 << 8) | AVX2 | SSE; |
91 | 118 | } |
92 | | - return 0; |
| 119 | + |
| 120 | + if (access(avx512_skip_file, F_OK)) { |
| 121 | + if ((opt_level_mask >> 8) & AVX512) { |
| 122 | + /* Downgrade the entire system to AVX2 */ |
| 123 | + opt_level_mask = (AVX2 << 8) | AVX2 | SSE; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + if (access(avx2_skip_file, F_OK)) { |
| 128 | + if ((opt_level_mask >> 8) & AVX2) { |
| 129 | + /* Downgrade the entire system to SSE */ |
| 130 | + opt_level_mask = (SSE << 8) | SSE; |
| 131 | + } else { |
| 132 | + /* Otherwise just remove AVX2 support from the flags */ |
| 133 | + opt_level_mask &= (uint64_t)~(1 << AVX2); |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + FREE(avx2_skip_file); |
| 138 | + FREE(avx512_skip_file); |
| 139 | + |
| 140 | + return opt_level_mask; |
| 141 | +} |
| 142 | + |
| 143 | +bool use_file(uint64_t file_mask, uint64_t sys_mask) |
| 144 | +{ |
| 145 | + /* Order is very intentional for these tests */ |
| 146 | + unsigned char file_options = file_mask & 255; |
| 147 | + unsigned char sys_options = sys_mask & 255; |
| 148 | + |
| 149 | + /* First SSE check is a slight performance optimization */ |
| 150 | + /* If the file only supports SSE, just use it */ |
| 151 | + if (file_options == SSE) { |
| 152 | + return true; |
| 153 | + } |
| 154 | + |
| 155 | + /* This must be done seperately as SSE == 0 and so the mask tests |
| 156 | + after this won't detect SSE files matching */ |
| 157 | + /* If the system and file only have SSE in common, use SSE */ |
| 158 | + if ((sys_options & file_options) == SSE) { |
| 159 | + if ((file_mask >> 8) == SSE) { |
| 160 | + return true; |
| 161 | + } else { |
| 162 | + return false; |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + /* Another check that is a slight performance optimization by |
| 167 | + checking a special case to return early. |
| 168 | + The SSE matching earlier is important as it would have been |
| 169 | + missed by this check. */ |
| 170 | + /* If the file isn't supported by the system don't use it */ |
| 171 | + if (!((file_mask >> 8) & sys_mask)) { |
| 172 | + return false; |
| 173 | + } |
| 174 | + |
| 175 | + /* Look for the file with the system's highest supported optimization level */ |
| 176 | + for (int i = MAX_OPTIMIZED_BIT_POWER; i >= 0; i--) { |
| 177 | + /* cur will eventually be non-zero as a previous check ensures that |
| 178 | + the condition of sys_options and file_options without any bits |
| 179 | + matching has already been handled */ |
| 180 | + unsigned char cur = (sys_options & (1 << i)) & (file_options & (1 << i)); |
| 181 | + if (cur) { |
| 182 | + /* best match between sys and file, use it if this file has the |
| 183 | + current optimization level */ |
| 184 | + if (cur & (file_mask >> 8)) { |
| 185 | + return true; |
| 186 | + } else { |
| 187 | + return false; |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + /* It shouldn't be possible to get here but don't do evil just in case */ |
| 193 | + error("File optimization match failure\n"); |
| 194 | + return (sys_options & (file_mask >> 8)) != 0; |
93 | 195 | } |
94 | 196 |
|
95 | 197 | struct manifest *manifest_parse(const char *component, const char *filename, bool header_only) |
@@ -216,6 +318,8 @@ struct manifest *manifest_parse(const char *component, const char *filename, boo |
216 | 318 | /* empty line */ |
217 | 319 | while (!feof(infile)) { |
218 | 320 | struct file *file; |
| 321 | + uint64_t bitmask_translation; |
| 322 | + uint64_t file_mask; |
219 | 323 |
|
220 | 324 | if (fgets(line, MANIFEST_LINE_MAXLEN, infile) == NULL) { |
221 | 325 | break; |
@@ -259,14 +363,24 @@ struct manifest *manifest_parse(const char *component, const char *filename, boo |
259 | 363 | file->is_experimental = 1; |
260 | 364 | } |
261 | 365 |
|
262 | | - if (set_opts(file, OPTIMIZED_BITMASKS[(unsigned char)line[2]])) { |
| 366 | + bitmask_translation = OPTIMIZED_BITMASKS[(unsigned char)line[2]]; |
| 367 | + if (bitmask_translation > AVX512_3) { |
| 368 | + error("Skipping unsupported file optimization level\n"); |
263 | 369 | FREE(file); |
264 | 370 | continue; |
265 | 371 | } |
266 | | - if (file->opt_level != SSE) { |
| 372 | + file_mask = LOOKUP_OPTIMIZED_BITMASKS[bitmask_translation]; |
| 373 | + if (str_cmp("/", globals.path_prefix) != 0) { |
| 374 | + /* Don't use optimized binaries when prefix is set */ |
| 375 | + /* as the operation might not be for the host system */ |
| 376 | + /* so default to SSE for this case. */ |
| 377 | + globals.opt_level_mask = (SSE << 8) | SSE; |
| 378 | + } |
| 379 | + if (!use_file(file_mask, globals.opt_level_mask)) { |
267 | 380 | FREE(file); |
268 | 381 | continue; |
269 | 382 | } |
| 383 | + file->opt_mask = file_mask; |
270 | 384 |
|
271 | 385 | if (line[3] == 'r') { |
272 | 386 | /* rename flag is ignored */ |
|
0 commit comments