Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit ce82b5d

Browse files
William Douglasbryteise
authored andcommitted
Enable use of optimized manifest files beyond SSE
Allow the usage of non-SSE files from manifests. This involves testing what the currently running system supports for optimized content and installing the best matching available binary the manifest provides. There is an exception for operations where the prefix is set to something other than the rootfs where SSE binaries will be used in all cases. Signed-off-by: William Douglas <[email protected]>
1 parent 5ffe228 commit ce82b5d

File tree

7 files changed

+241
-88
lines changed

7 files changed

+241
-88
lines changed

src/swupd.h

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,6 @@ struct update_stat {
109109
/* +1 for null termination */
110110
#define SWUPD_HASH_LEN (DIGEST_LEN_SHA256 + 1)
111111

112-
/* Any changes here must match mixer, requires format bump */
113-
#define SSE 0
114-
#define AVX2 1 << 0
115-
#define AVX512 1 << 1
116-
117-
#define SSE_0 0x0
118-
#define SSE_1 0x1
119-
#define SSE_2 0x2
120-
#define SSE_3 0x3
121-
#define AVX2_1 0x4
122-
#define AVX2_3 0x5
123-
#define AVX512_2 0x6
124-
#define AVX512_3 0x7
125-
126112
struct file {
127113
char *filename;
128114
char hash[SWUPD_HASH_LEN];
@@ -146,9 +132,7 @@ struct file {
146132
unsigned int is_exported : 1;
147133
unsigned int do_not_update : 1;
148134

149-
unsigned char opt_mask;
150-
unsigned char opt_level;
151-
unsigned char available_levels;
135+
uint64_t opt_mask;
152136

153137
struct file *peer; /* same file in another manifest */
154138
struct header *header;

src/swupd_lib/globals.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "lib/log.h"
3333
#include "swupd.h"
34+
#include "manifest.h"
3435

3536
/* These defines have to be different from the local options
3637
* so they don't interfere with each other */
@@ -51,6 +52,7 @@ struct globals globals = {
5152
.max_retries = DEFAULT_MAX_RETRIES,
5253
.retry_delay = DEFAULT_RETRY_DELAY,
5354
.update_server_port = -1,
55+
.opt_level_mask = 0,
5456
};
5557

5658
struct globals_bkp globals_bkp;
@@ -451,6 +453,8 @@ bool globals_init(void)
451453
globals.global_times = timelist_new();
452454
}
453455

456+
globals.opt_level_mask = get_opt_level_mask();
457+
454458
/* backup the global variables that are likely to be modified by
455459
* some processed like when working with 3rd-party repositories
456460
* so we can recover them */

src/swupd_lib/globals.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
*/
1212

1313
#include <stdbool.h>
14+
#include <stdint.h>
1415

1516
#include "lib/timelist.h"
1617

@@ -57,6 +58,7 @@ extern struct globals {
5758
int skip_optional_bundles;
5859
int update_server_port;
5960
int user_interaction;
61+
uint64_t opt_level_mask;
6062
timelist *global_times;
6163
} globals;
6264

src/swupd_lib/manifest.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@
1717
extern "C" {
1818
#endif
1919

20+
#define MAX_OPTIMIZED_BIT_POWER 1
21+
#define SSE 0
22+
#define AVX2 1 << 0
23+
#define AVX512 1 << 1
24+
25+
#define SSE_0 0
26+
#define SSE_1 1
27+
#define SSE_2 2
28+
#define SSE_3 3
29+
#define AVX2_1 4
30+
#define AVX2_3 5
31+
#define AVX512_2 6
32+
#define AVX512_3 7
33+
2034
struct manifest {
2135
// Header
2236
int manifest_version;
@@ -35,6 +49,13 @@ struct manifest {
3549
struct list *submanifests; /* struct manifest for subscribed manifests */
3650
};
3751

52+
/**
53+
* @brief Get the optimization mask for the current system.
54+
*
55+
* @returns The bitwise OR of the system optimization level and optimization levels supported by the sytem.
56+
*/
57+
uint64_t get_opt_level_mask(void);
58+
3859
/**
3960
* @brief Parse manifest located on disk under 'filename' and set it's name to 'component'.
4061
* @param header_only If set don't parse manifest files.

src/swupd_lib/manifest_parser.c

Lines changed: 157 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,20 @@
1717
*
1818
*/
1919

20+
#include <cpuid.h>
2021
#include <errno.h>
2122
#include <stdlib.h>
23+
#include <unistd.h>
2224

2325
#include "manifest.h"
2426
#include "swupd.h"
2527

2628
#define MANIFEST_LINE_MAXLEN (PATH_MAX * 2)
2729
#define MANIFEST_HEADER "MANIFEST\t"
2830

31+
const char AVX2_SKIP_FILE[] = "/etc/clear/elf-replace-avx2";
32+
const char AVX512_SKIP_FILE[] = "/etc/clear/elf-replace-avx512";
33+
2934
/* Below generated with the following:
3035
3136
unsigned char lt[256] = ".acdefghijklmnopqrtuvwxyzABDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#^*bsC";
@@ -36,12 +41,12 @@
3641
ltr[0] = 0;
3742
ltr['b'] = ltr['s'] = ltr['C'] = 0;
3843
39-
printf("static unsigned char OPTIMIZED_BITMASKS[256] = { ");
44+
printf("static unsigned char OPTIMIZED_BITMASKS[256] = {\n");
4045
for (int i = 0; i < 256; i++) {
4146
if (i < 255) {
42-
printf("0x%X, ", ltr[i]);
47+
printf("0x%X,\n", ltr[i]);
4348
} else {
44-
printf("0x%X ", ltr[i]);
49+
printf("0x%X\n", ltr[i]);
4550
}
4651
}
4752
printf("};\n");
@@ -50,46 +55,143 @@
5055
/* Changes to the OPTIMIZED_BITMASKS array require a format bump and corresponding mixer change */
5156
static unsigned char OPTIMIZED_BITMASKS[256] = { 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X3C, 0X0, 0X3D, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X3F, 0X0, 0X0, 0X0, 0X0, 0X0, 0X32, 0X33, 0X34, 0X35, 0X36, 0X37, 0X38, 0X39, 0X3A, 0X3B, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X19, 0X1A, 0X0, 0X1B, 0X1C, 0X1D, 0X1E, 0X1F, 0X20, 0X21, 0X22, 0X23, 0X24, 0X25, 0X26, 0X27, 0X28, 0X29, 0X2A, 0X2B, 0X2C, 0X2D, 0X2E, 0X2F, 0X30, 0X31, 0X0, 0X0, 0X0, 0X3E, 0X0, 0X0, 0X1, 0X0, 0X2, 0X3, 0X4, 0X5, 0X6, 0X7, 0X8, 0X9, 0XA, 0XB, 0XC, 0XD, 0XE, 0XF, 0X10, 0X11, 0X0, 0X12, 0X13, 0X14, 0X15, 0X16, 0X17, 0X18, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0, 0X0 };
5257

53-
int set_opts(struct file *file, unsigned char mask)
58+
/* Any changes to LOOKUP_OPTIMIZED_BITMASKS must match mixer,
59+
requires format bump */
60+
static uint64_t LOOKUP_OPTIMIZED_BITMASKS[256] = {
61+
[SSE_0] = (SSE << 8) | SSE,
62+
[SSE_1] = (SSE << 8) | SSE | AVX2,
63+
[SSE_2] = (SSE << 8) | SSE | AVX512,
64+
[SSE_3] = (SSE << 8) | SSE | AVX2 | AVX512,
65+
[AVX2_1] = (AVX2 << 8) | SSE | AVX2,
66+
[AVX2_3] = (AVX2 << 8) | SSE | AVX2 | AVX512,
67+
[AVX512_2] = (AVX512 << 8) | SSE | AVX512,
68+
[AVX512_3] = (AVX512 << 8) | SSE | AVX2 | AVX512,
69+
};
70+
71+
/* some CPUs support avx512 but it's not helpful for performance ... skip avx512 there */
72+
int avx512_is_unhelpful(void)
5473
{
55-
file->opt_mask = mask;
56-
switch (file->opt_mask) {
57-
case SSE_0:
58-
file->opt_level = SSE;
59-
file->available_levels = SSE;
60-
break;
61-
case SSE_1:
62-
file->opt_level = SSE;
63-
file->available_levels = SSE | AVX2;
64-
break;
65-
case SSE_2:
66-
file->opt_level = SSE;
67-
file->available_levels = SSE | AVX512;
68-
break;
69-
case SSE_3:
70-
file->opt_level = SSE;
71-
file->available_levels = SSE | AVX2 | AVX512;
72-
break;
73-
case AVX2_1:
74-
file->opt_level = AVX2;
75-
file->available_levels = SSE | AVX2;
76-
break;
77-
case AVX2_3:
78-
file->opt_level = AVX2;
79-
file->available_levels = SSE | AVX2 | AVX512;
80-
break;
81-
case AVX512_2:
82-
file->opt_level = AVX512;
83-
file->available_levels = SSE | AVX512;
84-
break;
85-
case AVX512_3:
86-
file->opt_level = AVX512;
87-
file->available_levels = SSE | AVX2 | AVX512;
88-
break;
89-
default:
90-
return -1;
74+
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
75+
uint32_t model;
76+
77+
/* currently only Intel cpus are on the list */
78+
if (!__builtin_cpu_is("intel"))
79+
return 0;
80+
81+
__cpuid(1, eax, ebx, ecx, edx);
82+
model = (eax >> 4) & 0xf;
83+
model += ((eax >> (16-4)) & 0xf0);
84+
85+
/* tigerlake */
86+
if (model == 0x8C) {
87+
return 1;
88+
}
89+
if (model == 0x8D) {
90+
return 1;
91+
}
92+
93+
return 0;
94+
}
95+
96+
uint64_t get_opt_level_mask(void)
97+
{
98+
char *avx2_skip_file = NULL;
99+
char *avx512_skip_file = NULL;
100+
uint64_t opt_level_mask = 0;
101+
102+
avx2_skip_file = sys_path_join("%s/%s", globals.path_prefix, AVX2_SKIP_FILE);
103+
avx512_skip_file = sys_path_join("%s/%s", globals.path_prefix, AVX512_SKIP_FILE);
104+
105+
if (__builtin_cpu_supports("avx512vl")) {
106+
opt_level_mask = (AVX512 << 8) | AVX512 | AVX2 | SSE;
107+
}
108+
else if (__builtin_cpu_supports("avx2") && __builtin_cpu_supports("fma") ) {
109+
opt_level_mask = (AVX2 << 8) | AVX2 | SSE;
110+
} else {
111+
/* SSE is zero but keep the same logic as others */
112+
opt_level_mask = (SSE << 8) | SSE;
113+
}
114+
115+
if (opt_level_mask & AVX512 && avx512_is_unhelpful()) {
116+
/* Pretend we are AVX2 only */
117+
opt_level_mask = (AVX2 << 8) | AVX2 | SSE;
91118
}
92-
return 0;
119+
120+
if (access(avx512_skip_file, F_OK)) {
121+
if ((opt_level_mask >> 8) & AVX512) {
122+
/* Downgrade the entire system to AVX2 */
123+
opt_level_mask = (AVX2 << 8) | AVX2 | SSE;
124+
}
125+
}
126+
127+
if (access(avx2_skip_file, F_OK)) {
128+
if ((opt_level_mask >> 8) & AVX2) {
129+
/* Downgrade the entire system to SSE */
130+
opt_level_mask = (SSE << 8) | SSE;
131+
} else {
132+
/* Otherwise just remove AVX2 support from the flags */
133+
opt_level_mask &= (uint64_t)~(1 << AVX2);
134+
}
135+
}
136+
137+
FREE(avx2_skip_file);
138+
FREE(avx512_skip_file);
139+
140+
return opt_level_mask;
141+
}
142+
143+
bool use_file(uint64_t file_mask, uint64_t sys_mask)
144+
{
145+
/* Order is very intentional for these tests */
146+
unsigned char file_options = file_mask & 255;
147+
unsigned char sys_options = sys_mask & 255;
148+
149+
/* First SSE check is a slight performance optimization */
150+
/* If the file only supports SSE, just use it */
151+
if (file_options == SSE) {
152+
return true;
153+
}
154+
155+
/* This must be done seperately as SSE == 0 and so the mask tests
156+
after this won't detect SSE files matching */
157+
/* If the system and file only have SSE in common, use SSE */
158+
if ((sys_options & file_options) == SSE) {
159+
if ((file_mask >> 8) == SSE) {
160+
return true;
161+
} else {
162+
return false;
163+
}
164+
}
165+
166+
/* Another check that is a slight performance optimization by
167+
checking a special case to return early.
168+
The SSE matching earlier is important as it would have been
169+
missed by this check. */
170+
/* If the file isn't supported by the system don't use it */
171+
if (!((file_mask >> 8) & sys_mask)) {
172+
return false;
173+
}
174+
175+
/* Look for the file with the system's highest supported optimization level */
176+
for (int i = MAX_OPTIMIZED_BIT_POWER; i >= 0; i--) {
177+
/* cur will eventually be non-zero as a previous check ensures that
178+
the condition of sys_options and file_options without any bits
179+
matching has already been handled */
180+
unsigned char cur = (sys_options & (1 << i)) & (file_options & (1 << i));
181+
if (cur) {
182+
/* best match between sys and file, use it if this file has the
183+
current optimization level */
184+
if (cur & (file_mask >> 8)) {
185+
return true;
186+
} else {
187+
return false;
188+
}
189+
}
190+
}
191+
192+
/* It shouldn't be possible to get here but don't do evil just in case */
193+
error("File optimization match failure\n");
194+
return (sys_options & (file_mask >> 8)) != 0;
93195
}
94196

95197
struct manifest *manifest_parse(const char *component, const char *filename, bool header_only)
@@ -216,6 +318,8 @@ struct manifest *manifest_parse(const char *component, const char *filename, boo
216318
/* empty line */
217319
while (!feof(infile)) {
218320
struct file *file;
321+
uint64_t bitmask_translation;
322+
uint64_t file_mask;
219323

220324
if (fgets(line, MANIFEST_LINE_MAXLEN, infile) == NULL) {
221325
break;
@@ -259,14 +363,24 @@ struct manifest *manifest_parse(const char *component, const char *filename, boo
259363
file->is_experimental = 1;
260364
}
261365

262-
if (set_opts(file, OPTIMIZED_BITMASKS[(unsigned char)line[2]])) {
366+
bitmask_translation = OPTIMIZED_BITMASKS[(unsigned char)line[2]];
367+
if (bitmask_translation > AVX512_3) {
368+
error("Skipping unsupported file optimization level\n");
263369
FREE(file);
264370
continue;
265371
}
266-
if (file->opt_level != SSE) {
372+
file_mask = LOOKUP_OPTIMIZED_BITMASKS[bitmask_translation];
373+
if (str_cmp("/", globals.path_prefix) != 0) {
374+
/* Don't use optimized binaries when prefix is set */
375+
/* as the operation might not be for the host system */
376+
/* so default to SSE for this case. */
377+
globals.opt_level_mask = (SSE << 8) | SSE;
378+
}
379+
if (!use_file(file_mask, globals.opt_level_mask)) {
267380
FREE(file);
268381
continue;
269382
}
383+
file->opt_mask = file_mask;
270384

271385
if (line[3] == 'r') {
272386
/* rename flag is ignored */

test/unit/data/mom2

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ X... 0000000000000000000000000000000000000000000000000000000000000004 30 f4
2424
..a. 0000000000000000000000000000000000000000000000000000000000000013 30 f13
2525
..c. 0000000000000000000000000000000000000000000000000000000000000014 30 f14
2626
..d. 0000000000000000000000000000000000000000000000000000000000000015 30 f15
27-
..e. 0000000000000000000000000000000000000000000000000000000000000016 30 f13
28-
..f. 0000000000000000000000000000000000000000000000000000000000000017 30 f15
29-
..g. 0000000000000000000000000000000000000000000000000000000000000018 30 f14
30-
..h. 0000000000000000000000000000000000000000000000000000000000000019 30 f15
27+
..e. 0000000000000000000000000000000000000000000000000000000000000113 30 f13
28+
..f. 0000000000000000000000000000000000000000000000000000000000000115 30 f15
29+
..g. 0000000000000000000000000000000000000000000000000000000000000214 30 f14
30+
..h. 0000000000000000000000000000000000000000000000000000000000000215 30 f15
3131
..i. 0000000000000000000000000000000000000000000000000000000000000020 30 fxy
3232
..j. 0000000000000000000000000000000000000000000000000000000000000021 30 fxy
3333
..k. 0000000000000000000000000000000000000000000000000000000000000022 30 fxy

0 commit comments

Comments
 (0)