@@ -361,6 +361,9 @@ static void numa_mapping(void) {
361
361
unsigned long work , bit ;
362
362
int count = 0 ;
363
363
int bitmask_idx = 0 ;
364
+ int current_cpu ;
365
+ int current_node = 0 ;
366
+ int cpu_count = 0 ;
364
367
365
368
for (node = 0 ; node < common -> num_nodes ; node ++ ) {
366
369
core = 0 ;
@@ -382,41 +385,95 @@ static void numa_mapping(void) {
382
385
fprintf (stderr , "CPU (%2d) : %08lx\n" , cpu , common -> cpu_info [cpu ]);
383
386
#endif
384
387
385
- h = 1 ;
386
-
387
- while (h < count ) h = 2 * h + 1 ;
388
-
389
- while (h > 1 ) {
390
- h /= 2 ;
391
- for (i = h ; i < count ; i ++ ) {
392
- work = common -> cpu_info [i ];
393
- bit = CPU_ISSET (i , & cpu_orig_mask [0 ]);
394
- j = i - h ;
395
- while (work < common -> cpu_info [j ]) {
396
- common -> cpu_info [j + h ] = common -> cpu_info [j ];
397
- if (CPU_ISSET (j , & cpu_orig_mask [0 ])) {
398
- CPU_SET (j + h , & cpu_orig_mask [0 ]);
399
- } else {
400
- CPU_CLR (j + h , & cpu_orig_mask [0 ]);
401
- }
402
- j -= h ;
403
- if (j < 0 ) break ;
404
- }
405
- common -> cpu_info [j + h ] = work ;
406
- if (bit ) {
407
- CPU_SET (j + h , & cpu_orig_mask [0 ]);
408
- } else {
409
- CPU_CLR (j + h , & cpu_orig_mask [0 ]);
388
+ current_cpu = sched_getcpu ();
389
+ for (cpu = 0 ; cpu < count ; cpu ++ ) {
390
+ if (READ_CPU (common -> cpu_info [cpu ]) == current_cpu ) {
391
+ current_node = READ_NODE (common -> cpu_info [cpu ]);
392
+ break ;
393
+ }
394
+ }
395
+ for (i = 0 ; i < MAX_BITMASK_LEN ; i ++ )
396
+ cpu_count += popcount (common -> node_info [current_node ][i ] & common -> avail [i ]);
397
+
398
+ /*
399
+ * If all the processes can be accommodated in the
400
+ * in the current node itself, then bind to cores
401
+ * from the current node only
402
+ */
403
+ if (numprocs <= cpu_count ) {
404
+ /*
405
+ * First sort all the cores in order from the current node.
406
+ * Then take remaining nodes one by one in order,
407
+ * and sort their cores in order.
408
+ */
409
+ for (i = 0 ; i < count ; i ++ ) {
410
+ for (j = 0 ; j < count - 1 ; j ++ ) {
411
+ int node_1 , node_2 ;
412
+ int core_1 , core_2 ;
413
+ int swap = 0 ;
414
+
415
+ node_1 = READ_NODE (common -> cpu_info [j ]);
416
+ node_2 = READ_NODE (common -> cpu_info [j + 1 ]);
417
+ core_1 = READ_CORE (common -> cpu_info [j ]);
418
+ core_2 = READ_CORE (common -> cpu_info [j + 1 ]);
419
+
420
+ if (node_1 == node_2 ) {
421
+ if (core_1 > core_2 )
422
+ swap = 1 ;
423
+ } else {
424
+ if ((node_2 == current_node ) ||
425
+ ((node_1 != current_node ) && (node_1 > node_2 )))
426
+ swap = 1 ;
427
+ }
428
+ if (swap ) {
429
+ unsigned long temp ;
430
+
431
+ temp = common -> cpu_info [j ];
432
+ common -> cpu_info [j ] = common -> cpu_info [j + 1 ];
433
+ common -> cpu_info [j + 1 ] = temp ;
434
+ }
410
435
}
436
+ }
437
+ } else {
438
+ h = 1 ;
439
+
440
+ while (h < count ) h = 2 * h + 1 ;
441
+
442
+ while (h > 1 ) {
443
+ h /= 2 ;
444
+ for (i = h ; i < count ; i ++ ) {
445
+ work = common -> cpu_info [i ];
446
+ bit = CPU_ISSET (i , & cpu_orig_mask [0 ]);
447
+ j = i - h ;
448
+ while (work < common -> cpu_info [j ]) {
449
+ common -> cpu_info [j + h ] = common -> cpu_info [j ];
450
+ if (CPU_ISSET (j , & cpu_orig_mask [0 ])) {
451
+ CPU_SET (j + h , & cpu_orig_mask [0 ]);
452
+ } else {
453
+ CPU_CLR (j + h , & cpu_orig_mask [0 ]);
454
+ }
455
+ j -= h ;
456
+ if (j < 0 ) break ;
457
+ }
458
+ common -> cpu_info [j + h ] = work ;
459
+ if (bit ) {
460
+ CPU_SET (j + h , & cpu_orig_mask [0 ]);
461
+ } else {
462
+ CPU_CLR (j + h , & cpu_orig_mask [0 ]);
463
+ }
411
464
465
+ }
412
466
}
413
467
}
414
468
415
469
#ifdef DEBUG
416
470
fprintf (stderr , "\nSorting ...\n\n" );
417
471
418
472
for (cpu = 0 ; cpu < count ; cpu ++ )
419
- fprintf (stderr , "CPU (%2d) : %08lx\n" , cpu , common -> cpu_info [cpu ]);
473
+ fprintf (stderr , "CPUINFO (%2d) : %08lx (CPU=%3lu CORE=%3lu NODE=%3lu)\n" , cpu , common -> cpu_info [cpu ],
474
+ READ_CPU (common -> cpu_info [cpu ]),
475
+ READ_CORE (common -> cpu_info [cpu ]),
476
+ READ_NODE (common -> cpu_info [cpu ]));
420
477
#endif
421
478
422
479
}
0 commit comments