Skip to content

Commit fd69ba3

Browse files
authored
Adding support for group_any in iree_thread_affinity_t. (iree-org#21089)
This allows code launching threads to specify that a thread should be assigned to a processor associated with a specific group (Windows GROUP_AFFINITY, Linux NUMA node ID, etc) instead of a specific processor within the group. This is useful for threads created in various parts of the codebase that don't globally coordinate (service workers/etc), while things like the task system that explicitly layout large collections of threads will continue to explicitly assign them.
1 parent c2e314a commit fd69ba3

File tree

10 files changed

+223
-61
lines changed

10 files changed

+223
-61
lines changed

runtime/src/iree/base/internal/threading.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ void iree_thread_affinity_set_any(iree_thread_affinity_t* out_thread_affinity) {
4141
memset(out_thread_affinity, 0x00, sizeof(*out_thread_affinity));
4242
}
4343

44+
void iree_thread_affinity_set_group_any(
45+
uint32_t group, iree_thread_affinity_t* out_thread_affinity) {
46+
memset(out_thread_affinity, 0x00, sizeof(*out_thread_affinity));
47+
out_thread_affinity->group_any = 1;
48+
out_thread_affinity->group = group;
49+
}
50+
4451
//==============================================================================
4552
// iree_thread_override_list_t
4653
//==============================================================================

runtime/src/iree/base/internal/threading.h

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,26 +87,45 @@ typedef enum iree_thread_priority_class_e {
8787
// id: GROUP_AFFINITY::Mask bit/PROCESSOR_NUMBER::Number.
8888
// smt: whether to set both the base ID and the subsequent ID in Mask.
8989
typedef struct iree_thread_affinity_t {
90-
// When 0 the affinity is undefined and the system may place the thread
91-
// anywhere and migrate it as much as it likes. In practice it may do that
92-
// even when specified.
93-
uint32_t specified : 1;
94-
// When 1 and the specified processor is part of an SMT set all logical cores
95-
// in the set should be reserved for the thread to avoid contention.
96-
uint32_t smt : 1;
90+
// When 1 the processor ID will be ignored and the platform will choose any
91+
// processor associated with the specified group (NUMA node ID).
92+
uint32_t group_any : 1;
9793
// Processor group the thread should be assigned to, aka NUMA node, cluster,
9894
// etc depending on platform. On platforms where the processor ID is unique
9995
// for the purposes of scheduling (e.g. Linux) this is used for related APIs
100-
// like mbind/set_mempolicy.
101-
uint32_t group : 7;
96+
// like mbind/set_mempolicy. If group_any is set and id_assigned is not then
97+
// any processor associated with the group will be used.
98+
uint32_t group : 8;
99+
100+
uint32_t reserved : 23;
101+
102+
// When 0 the affinity is undefined and the system may place the thread
103+
// anywhere and migrate it as much as it likes. In practice it may do that
104+
// even when specified.
105+
uint32_t id_assigned : 1;
102106
// Processor ID the thread should be scheduled on. The interpretation and
103107
// efficacy of this request varies per platform.
104-
uint32_t id : 23;
108+
uint32_t id : 30;
109+
// When 1 and the specified processor ID is part of an SMT set all logical
110+
// cores in the set should be reserved for the thread to avoid contention.
111+
uint32_t smt : 1;
105112
} iree_thread_affinity_t;
106113

107-
// Sets |thread_affinity| to match with any processor in the system.
114+
// Sets |out_thread_affinity| to match with any processor in the system.
108115
void iree_thread_affinity_set_any(iree_thread_affinity_t* out_thread_affinity);
109116

117+
// Returns true if |thread_affinity| does not specify any particular processor.
118+
static inline bool iree_thread_affinity_is_unspecified(
119+
iree_thread_affinity_t thread_affinity) {
120+
return !thread_affinity.group_any && !thread_affinity.id_assigned;
121+
}
122+
123+
// Sets |out_thread_affinity| to match all processors associated with the given
124+
// processor group (aka NUMA node ID). Any processor within the group may be
125+
// selected by the platform.
126+
void iree_thread_affinity_set_group_any(
127+
uint32_t group, iree_thread_affinity_t* out_thread_affinity);
128+
110129
// Thread creation parameters.
111130
// All are optional and the entire struct can safely be zero-initialized.
112131
typedef struct iree_thread_create_params_t {

runtime/src/iree/base/internal/threading_darwin.c

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
116116

117117
// Ensure we start with the right QoS class.
118118
qos_class_t qos_class;
119-
if (params.initial_affinity.specified && params.initial_affinity.smt) {
119+
if (params.initial_affinity.id_assigned && params.initial_affinity.smt) {
120120
qos_class = QOS_CLASS_BACKGROUND;
121121
} else {
122122
qos_class = iree_thread_qos_class_for_priority_class(params.priority_class);
@@ -148,7 +148,7 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
148148
}
149149

150150
thread->mach_port = pthread_mach_thread_np(thread->handle);
151-
if (params.initial_affinity.specified) {
151+
if (!iree_thread_affinity_is_unspecified(params.initial_affinity)) {
152152
iree_thread_request_affinity(thread, params.initial_affinity);
153153
}
154154

@@ -210,22 +210,29 @@ void iree_thread_override_end(iree_thread_override_t* override) {
210210

211211
void iree_thread_request_affinity(iree_thread_t* thread,
212212
iree_thread_affinity_t affinity) {
213-
if (!affinity.specified) return;
214213
IREE_TRACE_ZONE_BEGIN(z0);
215214

216-
// Use mach_task_self when the caller requesting the affinity change is the
217-
// thread being changed.
218-
mach_port_t thread_port =
219-
thread->handle == pthread_self() ? mach_task_self() : thread->mach_port;
220-
221-
// See:
222-
// https://gist.github.com/Coneko/4234842
223-
// https://fergofrog.com/code/cbowser/xnu/osfmk/mach/thread_policy.h.html
224-
// http://www.hybridkernel.com/2015/01/18/binding_threads_to_cores_osx.html
225-
thread_affinity_policy_data_t policy_data = {affinity.id};
226-
thread_policy_set(thread_port, THREAD_AFFINITY_POLICY,
227-
(thread_policy_t)(&policy_data),
228-
THREAD_AFFINITY_POLICY_COUNT);
215+
// NOTE: group affinity is not yet supported, only ID affinity.
216+
// When the ID is not assigned we should really clear the policy but that
217+
// doesn't seem possible. Today we don't migrate affinities in a way where
218+
// we'd ever want to do anything but assign new ones so this is ok. The kernel
219+
// is allowed to totally ignore the affinity request and interpret it however
220+
// it likes so this is all not critical anyway.
221+
if (affinity.id_assigned) {
222+
// Use mach_task_self when the caller requesting the affinity change is the
223+
// thread being changed.
224+
mach_port_t thread_port =
225+
thread->handle == pthread_self() ? mach_task_self() : thread->mach_port;
226+
227+
// See:
228+
// https://gist.github.com/Coneko/4234842
229+
// https://fergofrog.com/code/cbowser/xnu/osfmk/mach/thread_policy.h.html
230+
// http://www.hybridkernel.com/2015/01/18/binding_threads_to_cores_osx.html
231+
thread_affinity_policy_data_t policy_data = {affinity.id};
232+
thread_policy_set(thread_port, THREAD_AFFINITY_POLICY,
233+
(thread_policy_t)(&policy_data),
234+
THREAD_AFFINITY_POLICY_COUNT);
235+
}
229236

230237
IREE_TRACE_ZONE_END(z0);
231238
}

runtime/src/iree/base/internal/threading_pthreads.c

Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
179179
if (params.priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL) {
180180
iree_thread_set_priority_class(thread, params.priority_class);
181181
}
182-
if (params.initial_affinity.specified) {
182+
if (!iree_thread_affinity_is_unspecified(params.initial_affinity)) {
183183
iree_thread_request_affinity(thread, params.initial_affinity);
184184
}
185185

@@ -297,17 +297,124 @@ void iree_thread_override_end(iree_thread_override_t* override) {
297297
IREE_TRACE_ZONE_END(z0);
298298
}
299299

300+
// Sets all CPU bits in the given |out_set|.
301+
// The platform is allowed to place the thread on any CPU.
302+
static void iree_thread_make_cpu_set_all(cpu_set_t* out_set) {
303+
for (uint32_t i = 0; i < CPU_SETSIZE; ++i) {
304+
CPU_SET(i, out_set);
305+
}
306+
}
307+
308+
#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
309+
310+
// Sets CPU bits associated with the given NUMA node ID.
311+
// If the platform query fails then all CPU bits are set.
312+
static void iree_thread_make_cpu_set_from_node_id(uint32_t node_id,
313+
cpu_set_t* out_set) {
314+
// e.g. /sys/devices/system/node/node0/cpumap
315+
char cpumap_path[256];
316+
snprintf(cpumap_path, sizeof(cpumap_path),
317+
"/sys/devices/system/node/node%u/cpumap", node_id);
318+
319+
// Open file for reading. This should succeed under hypervisors/lockdown.
320+
FILE* file = fopen(cpumap_path, "r");
321+
if (!file) {
322+
// Permission denied or not found (not a conformant Linux kernel).
323+
iree_thread_make_cpu_set_all(out_set);
324+
return;
325+
}
326+
327+
// Read the entire file to EOF and get the cpumap line.
328+
// After trimming we expect |line| to be something like:
329+
// 'ffffffff,ffffffff,ffffffff,00000000,00000000,00000000'
330+
char line_buffer[512];
331+
const size_t read_length = fread(line_buffer, 1, sizeof(line_buffer), file);
332+
if (ferror(file)) {
333+
// Read should never fail, but may if the CPU set grows to thousands. We'd
334+
// probably want to then query the file length and allocate a heap buffer.
335+
// For now all systems we can observe easily fit into our stack buffer.
336+
iree_thread_make_cpu_set_all(out_set);
337+
return;
338+
}
339+
iree_string_view_t line =
340+
iree_string_view_trim(iree_make_string_view(line_buffer, read_length));
341+
342+
// Parse each comma-delimited segment. Segments are a base-16 encoded uint32_t
343+
// value. Each segment contains 32 CPU bits and we track the current index
344+
// as we walk them to get the absolute cpu_set_t index.
345+
intptr_t split_index = 0;
346+
iree_host_size_t cpu_index = 0;
347+
do {
348+
iree_string_view_t segment_str;
349+
split_index = iree_string_view_split(line, ',', &segment_str, &line);
350+
uint32_t segment = 0;
351+
if (!iree_string_view_atoi_uint32_base(segment_str, 16, &segment)) {
352+
// Failed to parse segment as an integer.
353+
iree_thread_make_cpu_set_all(out_set);
354+
return;
355+
}
356+
for (iree_host_size_t i = 0; i < 32; ++i) {
357+
if (segment & (1ull << i)) {
358+
CPU_SET(cpu_index + i, out_set);
359+
}
360+
}
361+
cpu_index += 32;
362+
} while (split_index != -1);
363+
364+
fclose(file);
365+
}
366+
367+
#else
368+
369+
// No implementation available. BSD may have some equivalent to the Linux
370+
// cpumap we could use.
371+
static void iree_thread_make_cpu_set_from_node_id(uint32_t node_id,
372+
cpu_set_t* out_set) {
373+
iree_thread_make_cpu_set_all(out_set);
374+
}
375+
376+
#endif // IREE_PLATFORM_EMSCRIPTEN
377+
378+
static void iree_thread_make_cpu_set_from_affinity(
379+
iree_thread_affinity_t affinity, cpu_set_t* out_set) {
380+
CPU_ZERO(out_set);
381+
382+
// Assign to any processor in the group.
383+
if (affinity.group_any) {
384+
iree_thread_make_cpu_set_from_node_id(affinity.group, out_set);
385+
return;
386+
}
387+
388+
// Specific processors can be set directly and optionally we also set its
389+
// paired SMT processor. Note that we don't check whether SMT is enabled and
390+
// assume the smt field is only assigned if it is.
391+
if (affinity.id_assigned) {
392+
CPU_SET(affinity.id, out_set);
393+
if (affinity.smt) {
394+
CPU_SET(affinity.id + 1, out_set);
395+
}
396+
return;
397+
}
398+
399+
// No specific affinity specified; use any CPU.
400+
iree_thread_make_cpu_set_all(out_set);
401+
}
402+
300403
void iree_thread_request_affinity(iree_thread_t* thread,
301404
iree_thread_affinity_t affinity) {
302-
if (!affinity.specified) return;
303405
IREE_TRACE_ZONE_BEGIN(z0);
406+
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
407+
char affinity_desc[64];
408+
int affinity_desc_length =
409+
snprintf(affinity_desc, IREE_ARRAYSIZE(affinity_desc),
410+
"group_any=%u, group=%u, id_assigned=%u, id=%u, smt=%u",
411+
affinity.group_any, affinity.group, affinity.id_assigned,
412+
affinity.id, affinity.smt);
413+
IREE_TRACE_ZONE_APPEND_TEXT(z0, affinity_desc, affinity_desc_length);
414+
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
304415

305416
cpu_set_t cpu_set;
306-
CPU_ZERO(&cpu_set);
307-
CPU_SET(affinity.id, &cpu_set);
308-
if (affinity.smt) {
309-
CPU_SET(affinity.id + 1, &cpu_set);
310-
}
417+
iree_thread_make_cpu_set_from_affinity(affinity, &cpu_set);
311418

312419
#if defined(IREE_PLATFORM_ANDROID)
313420
// `pthread_gettid_np` is only available on API 21+ and it is needed to set

runtime/src/iree/base/internal/threading_win32.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
175175
if (params.priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL) {
176176
iree_thread_set_priority_class(thread, params.priority_class);
177177
}
178-
if (params.initial_affinity.specified) {
178+
if (!iree_thread_affinity_is_unspecified(params.initial_affinity)) {
179179
iree_thread_request_affinity(thread, params.initial_affinity);
180180
}
181181

@@ -262,24 +262,41 @@ void iree_thread_override_end(iree_thread_override_t* override) {
262262

263263
void iree_thread_request_affinity(iree_thread_t* thread,
264264
iree_thread_affinity_t affinity) {
265-
if (!affinity.specified) return;
266265
IREE_TRACE_ZONE_BEGIN(z0);
267266
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
268-
char affinity_desc[32];
269-
int affinity_desc_length = snprintf(
270-
affinity_desc, IREE_ARRAYSIZE(affinity_desc), "group=%d, id=%d, smt=%d",
271-
affinity.group, affinity.id, affinity.smt);
267+
char affinity_desc[64];
268+
int affinity_desc_length =
269+
snprintf(affinity_desc, IREE_ARRAYSIZE(affinity_desc),
270+
"group_any=%u, group=%u, id_assigned=%u, id=%u, smt=%u",
271+
affinity.group_any, affinity.group, affinity.id_assigned,
272+
affinity.id, affinity.smt);
272273
IREE_TRACE_ZONE_APPEND_TEXT(z0, affinity_desc, affinity_desc_length);
273274
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
274275

276+
// TODO(benvanik): switch to the Windows 11 APIs when available (dynamically)
277+
// for specifying groups with more than 64 processors. Prior to the new APIs
278+
// each group was limited to 64 logical processors and that resulted in groups
279+
// being sharded. We need to update our task topology code (which is the
280+
// primary caller of this function) as well as others to assign the newer
281+
// group IDs and this code to do the same.
282+
//
283+
// See:
284+
// https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
285+
// KeQueryNodeActiveAffinity2
286+
// (probably SetThreadSelectedCpuSets?)
287+
275288
GROUP_AFFINITY group_affinity;
276289
memset(&group_affinity, 0, sizeof(group_affinity));
277290
group_affinity.Group = affinity.group;
278-
KAFFINITY affinity_mask = 1ull << affinity.id;
279-
if (affinity.smt) {
280-
affinity_mask |= 1ull << (affinity.id + 1);
291+
if (affinity.group_any) {
292+
group_affinity.Mask = (KAFFINITY)UINTPTR_MAX;
293+
} else {
294+
KAFFINITY affinity_mask = 1ull << affinity.id;
295+
if (affinity.smt) {
296+
affinity_mask |= 1ull << (affinity.id + 1);
297+
}
298+
group_affinity.Mask = affinity_mask;
281299
}
282-
group_affinity.Mask = affinity_mask;
283300
SetThreadGroupAffinity(thread->handle, &group_affinity, NULL);
284301

285302
// TODO(benvanik): figure out of this is a bad thing; sometimes it can result

runtime/src/iree/task/api.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,9 @@ static void iree_task_flags_dump_task_topology(
234234
fprintf(stdout, "# group[%d]: '%s'\n", group->group_index, group->name);
235235
fprintf(stdout, "# processor: %u\n", group->processor_index);
236236
fprintf(stdout, "# affinity: ");
237-
if (group->ideal_thread_affinity.specified) {
237+
if (group->ideal_thread_affinity.group_any) {
238+
fprintf(stdout, "group=%u (any)", group->ideal_thread_affinity.group);
239+
} else if (group->ideal_thread_affinity.id_assigned) {
238240
fprintf(
239241
stdout, "group=%u, id=%u, smt=%u", group->ideal_thread_affinity.group,
240242
group->ideal_thread_affinity.id, group->ideal_thread_affinity.smt);

runtime/src/iree/task/topology_cpuinfo.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ iree_status_t iree_task_topology_initialize_from_logical_cpu_set(
7474
// really used on Linux today anyway.
7575
iree_thread_affinity_t* affinity = &group->ideal_thread_affinity;
7676
memset(affinity, 0, sizeof(*affinity));
77-
affinity->specified = 1;
77+
affinity->id_assigned = 1;
7878
affinity->id = cpu_ids[i];
7979
}
8080

@@ -143,12 +143,6 @@ static void iree_task_topology_set_affinity_from_processor(
143143
const struct cpuinfo_processor* processor,
144144
iree_thread_affinity_t* out_affinity) {
145145
memset(out_affinity, 0, sizeof(*out_affinity));
146-
out_affinity->specified = 1;
147-
148-
// Special bit to indicate that (if required) we want the entire core.
149-
if (processor->core->processor_count > 1) {
150-
out_affinity->smt = 1;
151-
}
152146

153147
// cpuinfo #ifdefs the fields we need to extract the right platform IDs.
154148
// We purposefully use the same exact macros they do there so that we don't
@@ -161,14 +155,21 @@ static void iree_task_topology_set_affinity_from_processor(
161155
// the kernel to distribute the threads so the exact bits don't matter as long
162156
// as they are unique per group we want isolated.
163157
out_affinity->group = processor->cluster->cluster_id;
158+
out_affinity->id_assigned = 1;
164159
out_affinity->id = (uint32_t)(uintptr_t)processor;
165160
#elif defined(__linux__)
166161
out_affinity->group = processor->cluster->cluster_id;
162+
out_affinity->id_assigned = 1;
167163
out_affinity->id = processor->linux_id;
168164
#else
169165
// WASM? Unusued today.
170-
out_affinity->specified = 0;
166+
out_affinity->id_assigned = 0;
171167
#endif // cpuinfo-like platform field
168+
169+
// Special bit to indicate that (if required) we want the entire core.
170+
if (processor->core->processor_count > 1) {
171+
out_affinity->smt = 1;
172+
}
172173
}
173174

174175
// Populates |out_group| with the information from |processor|.

runtime/src/iree/task/topology_darwin.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ iree_status_t iree_task_topology_initialize_from_physical_cores(
213213
// affinity info. Note that we pack "use efficiency cores only" into the SMT
214214
// bit and use that to force a QoS level that ensures only efficiency cores
215215
// are used when present. Probably.
216-
group->ideal_thread_affinity.specified = 1;
217216
group->ideal_thread_affinity.group = (uint32_t)node_id;
217+
group->ideal_thread_affinity.id_assigned = 1;
218218
group->ideal_thread_affinity.id = i;
219219
switch (performance_level) {
220220
default:

0 commit comments

Comments
 (0)