@@ -179,7 +179,7 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
179179 if (params .priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL ) {
180180 iree_thread_set_priority_class (thread , params .priority_class );
181181 }
182- if (params .initial_affinity . specified ) {
182+ if (! iree_thread_affinity_is_unspecified ( params .initial_affinity ) ) {
183183 iree_thread_request_affinity (thread , params .initial_affinity );
184184 }
185185
@@ -297,17 +297,124 @@ void iree_thread_override_end(iree_thread_override_t* override) {
297297 IREE_TRACE_ZONE_END (z0 );
298298}
299299
300+ // Sets all CPU bits in the given |out_set|.
301+ // The platform is allowed to place the thread on any CPU.
302+ static void iree_thread_make_cpu_set_all (cpu_set_t * out_set ) {
303+ for (uint32_t i = 0 ; i < CPU_SETSIZE ; ++ i ) {
304+ CPU_SET (i , out_set );
305+ }
306+ }
307+
308+ #if defined(IREE_PLATFORM_ANDROID ) || defined(IREE_PLATFORM_LINUX )
309+
310+ // Sets CPU bits associated with the given NUMA node ID.
311+ // If the platform query fails then all CPU bits are set.
312+ static void iree_thread_make_cpu_set_from_node_id (uint32_t node_id ,
313+ cpu_set_t * out_set ) {
314+ // e.g. /sys/devices/system/node/node0/cpumap
315+ char cpumap_path [256 ];
316+ snprintf (cpumap_path , sizeof (cpumap_path ),
317+ "/sys/devices/system/node/node%u/cpumap" , node_id );
318+
319+ // Open file for reading. This should succeed under hypervisors/lockdown.
320+ FILE * file = fopen (cpumap_path , "r" );
321+ if (!file ) {
322+ // Permission denied or not found (not a conformant Linux kernel).
323+ iree_thread_make_cpu_set_all (out_set );
324+ return ;
325+ }
326+
327+ // Read the entire file to EOF and get the cpumap line.
328+ // After trimming we expect |line| to be something like:
329+ // 'ffffffff,ffffffff,ffffffff,00000000,00000000,00000000'
330+ char line_buffer [512 ];
331+ const size_t read_length = fread (line_buffer , 1 , sizeof (line_buffer ), file );
332+ if (ferror (file )) {
333+ // Read should never fail, but may if the CPU set grows to thousands. We'd
334+ // probably want to then query the file length and allocate a heap buffer.
335+ // For now all systems we can observe easily fit into our stack buffer.
336+ iree_thread_make_cpu_set_all (out_set );
337+ return ;
338+ }
339+ iree_string_view_t line =
340+ iree_string_view_trim (iree_make_string_view (line_buffer , read_length ));
341+
342+ // Parse each comma-delimited segment. Segments are a base-16 encoded uint32_t
343+ // value. Each segment contains 32 CPU bits and we track the current index
344+ // as we walk them to get the absolute cpu_set_t index.
345+ intptr_t split_index = 0 ;
346+ iree_host_size_t cpu_index = 0 ;
347+ do {
348+ iree_string_view_t segment_str ;
349+ split_index = iree_string_view_split (line , ',' , & segment_str , & line );
350+ uint32_t segment = 0 ;
351+ if (!iree_string_view_atoi_uint32_base (segment_str , 16 , & segment )) {
352+ // Failed to parse segment as an integer.
353+ iree_thread_make_cpu_set_all (out_set );
354+ return ;
355+ }
356+ for (iree_host_size_t i = 0 ; i < 32 ; ++ i ) {
357+ if (segment & (1ull << i )) {
358+ CPU_SET (cpu_index + i , out_set );
359+ }
360+ }
361+ cpu_index += 32 ;
362+ } while (split_index != -1 );
363+
364+ fclose (file );
365+ }
366+
367+ #else
368+
369+ // No implementation available. BSD may have some equivalent to the Linux
370+ // cpumap we could use.
371+ static void iree_thread_make_cpu_set_from_node_id (uint32_t node_id ,
372+ cpu_set_t * out_set ) {
373+ iree_thread_make_cpu_set_all (out_set );
374+ }
375+
376+ #endif // IREE_PLATFORM_EMSCRIPTEN
377+
378+ static void iree_thread_make_cpu_set_from_affinity (
379+ iree_thread_affinity_t affinity , cpu_set_t * out_set ) {
380+ CPU_ZERO (out_set );
381+
382+ // Assign to any processor in the group.
383+ if (affinity .group_any ) {
384+ iree_thread_make_cpu_set_from_node_id (affinity .group , out_set );
385+ return ;
386+ }
387+
388+ // Specific processors can be set directly and optionally we also set its
389+ // paired SMT processor. Note that we don't check whether SMT is enabled and
390+ // assume the smt field is only assigned if it is.
391+ if (affinity .id_assigned ) {
392+ CPU_SET (affinity .id , out_set );
393+ if (affinity .smt ) {
394+ CPU_SET (affinity .id + 1 , out_set );
395+ }
396+ return ;
397+ }
398+
399+ // No specific affinity specified; use any CPU.
400+ iree_thread_make_cpu_set_all (out_set );
401+ }
402+
300403void iree_thread_request_affinity (iree_thread_t * thread ,
301404 iree_thread_affinity_t affinity ) {
302- if (!affinity .specified ) return ;
303405 IREE_TRACE_ZONE_BEGIN (z0 );
406+ #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
407+ char affinity_desc [64 ];
408+ int affinity_desc_length =
409+ snprintf (affinity_desc , IREE_ARRAYSIZE (affinity_desc ),
410+ "group_any=%u, group=%u, id_assigned=%u, id=%u, smt=%u" ,
411+ affinity .group_any , affinity .group , affinity .id_assigned ,
412+ affinity .id , affinity .smt );
413+ IREE_TRACE_ZONE_APPEND_TEXT (z0 , affinity_desc , affinity_desc_length );
414+ #endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
304415
305416 cpu_set_t cpu_set ;
306- CPU_ZERO (& cpu_set );
307- CPU_SET (affinity .id , & cpu_set );
308- if (affinity .smt ) {
309- CPU_SET (affinity .id + 1 , & cpu_set );
310- }
417+ iree_thread_make_cpu_set_from_affinity (affinity , & cpu_set );
311418
312419#if defined(IREE_PLATFORM_ANDROID )
313420 // `pthread_gettid_np` is only available on API 21+ and it is needed to set
0 commit comments