|
| 1 | +From 5fb4d7f6079a76b2907ccc8c53c7c509c30a3dca Mon Sep 17 00:00:00 2001 |
| 2 | +From: Nikita Popov < [email protected]> |
| 3 | +Date: Thu, 10 Oct 2024 12:47:33 +0000 |
| 4 | +Subject: [PATCH] [openmp] Use core_siblings_list if physical_package_id not |
| 5 | + available |
| 6 | + |
| 7 | +On powerpc, physical_package_id may not be available. Currently, |
| 8 | +this causes openmp to fall back to flat topology and various |
| 9 | +affinity tests fail. |
| 10 | + |
| 11 | +Fix this by parsing core_siblings_list to deterimine which cpus |
| 12 | +belong to the same socket. This matches what the testing code |
| 13 | +does. The code to parse the CPU list format thankfully already |
| 14 | +exists. |
| 15 | + |
| 16 | +Fixes https://github.com/llvm/llvm-project/issues/111809. |
| 17 | +--- |
| 18 | + openmp/runtime/src/kmp_affinity.cpp | 100 +++++++++++++------ |
| 19 | + openmp/runtime/test/affinity/kmp-hw-subset.c | 2 +- |
| 20 | + 2 files changed, 72 insertions(+), 30 deletions(-) |
| 21 | + |
| 22 | +diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp |
| 23 | +index cf5cad04eb57..c3d5ecf1345e 100644 |
| 24 | +--- a/openmp/runtime/src/kmp_affinity.cpp |
| 25 | ++++ b/openmp/runtime/src/kmp_affinity.cpp |
| 26 | +@@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, |
| 27 | + return buf; |
| 28 | + } |
| 29 | + |
| 30 | +-// Return (possibly empty) affinity mask representing the offline CPUs |
| 31 | +-// Caller must free the mask |
| 32 | +-kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() { |
| 33 | +- kmp_affin_mask_t *offline; |
| 34 | +- KMP_CPU_ALLOC(offline); |
| 35 | +- KMP_CPU_ZERO(offline); |
| 36 | ++static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) { |
| 37 | ++ kmp_affin_mask_t *mask; |
| 38 | ++ KMP_CPU_ALLOC(mask); |
| 39 | ++ KMP_CPU_ZERO(mask); |
| 40 | + #if KMP_OS_LINUX |
| 41 | + int n, begin_cpu, end_cpu; |
| 42 | +- kmp_safe_raii_file_t offline_file; |
| 43 | ++ kmp_safe_raii_file_t file; |
| 44 | + auto skip_ws = [](FILE *f) { |
| 45 | + int c; |
| 46 | + do { |
| 47 | +@@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() { |
| 48 | + if (c != EOF) |
| 49 | + ungetc(c, f); |
| 50 | + }; |
| 51 | +- // File contains CSV of integer ranges representing the offline CPUs |
| 52 | ++ // File contains CSV of integer ranges representing the CPUs |
| 53 | + // e.g., 1,2,4-7,9,11-15 |
| 54 | +- int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r"); |
| 55 | ++ int status = file.try_open(path, "r"); |
| 56 | + if (status != 0) |
| 57 | +- return offline; |
| 58 | +- while (!feof(offline_file)) { |
| 59 | +- skip_ws(offline_file); |
| 60 | +- n = fscanf(offline_file, "%d", &begin_cpu); |
| 61 | ++ return mask; |
| 62 | ++ while (!feof(file)) { |
| 63 | ++ skip_ws(file); |
| 64 | ++ n = fscanf(file, "%d", &begin_cpu); |
| 65 | + if (n != 1) |
| 66 | + break; |
| 67 | +- skip_ws(offline_file); |
| 68 | +- int c = fgetc(offline_file); |
| 69 | ++ skip_ws(file); |
| 70 | ++ int c = fgetc(file); |
| 71 | + if (c == EOF || c == ',') { |
| 72 | + // Just single CPU |
| 73 | + end_cpu = begin_cpu; |
| 74 | + } else if (c == '-') { |
| 75 | + // Range of CPUs |
| 76 | +- skip_ws(offline_file); |
| 77 | +- n = fscanf(offline_file, "%d", &end_cpu); |
| 78 | ++ skip_ws(file); |
| 79 | ++ n = fscanf(file, "%d", &end_cpu); |
| 80 | + if (n != 1) |
| 81 | + break; |
| 82 | +- skip_ws(offline_file); |
| 83 | +- c = fgetc(offline_file); // skip ',' |
| 84 | ++ skip_ws(file); |
| 85 | ++ c = fgetc(file); // skip ',' |
| 86 | + } else { |
| 87 | + // Syntax problem |
| 88 | + break; |
| 89 | +@@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() { |
| 90 | + end_cpu >= __kmp_xproc || begin_cpu > end_cpu) { |
| 91 | + continue; |
| 92 | + } |
| 93 | +- // Insert [begin_cpu, end_cpu] into offline mask |
| 94 | ++ // Insert [begin_cpu, end_cpu] into mask |
| 95 | + for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) { |
| 96 | +- KMP_CPU_SET(cpu, offline); |
| 97 | ++ KMP_CPU_SET(cpu, mask); |
| 98 | + } |
| 99 | + } |
| 100 | + #endif |
| 101 | +- return offline; |
| 102 | ++ return mask; |
| 103 | ++} |
| 104 | ++ |
| 105 | ++// Return (possibly empty) affinity mask representing the offline CPUs |
| 106 | ++// Caller must free the mask |
| 107 | ++kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() { |
| 108 | ++ return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline"); |
| 109 | + } |
| 110 | + |
| 111 | + // Return the number of available procs |
| 112 | +@@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() { |
| 113 | + return envvar; |
| 114 | + } |
| 115 | + |
| 116 | ++static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo, |
| 117 | ++ unsigned num_avail, |
| 118 | ++ unsigned idx) { |
| 119 | ++ if (!KMP_AFFINITY_CAPABLE()) |
| 120 | ++ return false; |
| 121 | ++ |
| 122 | ++ char path[256]; |
| 123 | ++ KMP_SNPRINTF(path, sizeof(path), |
| 124 | ++ "/sys/devices/system/cpu/cpu%u/topology/core_siblings_list", |
| 125 | ++ threadInfo[idx][osIdIndex]); |
| 126 | ++ kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path); |
| 127 | ++ for (unsigned i = 0; i < num_avail; ++i) { |
| 128 | ++ unsigned cpu_id = threadInfo[i][osIdIndex]; |
| 129 | ++ KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT); |
| 130 | ++ if (!KMP_CPU_ISSET(cpu_id, siblings)) |
| 131 | ++ continue; |
| 132 | ++ if (threadInfo[i][pkgIdIndex] == UINT_MAX) { |
| 133 | ++ // Arbitrarily pick the first index we encounter, it only matters that |
| 134 | ++ // the value is the same for all siblings. |
| 135 | ++ threadInfo[i][pkgIdIndex] = idx; |
| 136 | ++ } else if (threadInfo[i][pkgIdIndex] != idx) { |
| 137 | ++ // Contradictory sibling lists. |
| 138 | ++ KMP_CPU_FREE(siblings); |
| 139 | ++ return false; |
| 140 | ++ } |
| 141 | ++ } |
| 142 | ++ KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX); |
| 143 | ++ KMP_CPU_FREE(siblings); |
| 144 | ++ return true; |
| 145 | ++} |
| 146 | ++ |
| 147 | + // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the |
| 148 | + // affinity map. On AIX, the map is obtained through system SRAD (Scheduler |
| 149 | + // Resource Allocation Domain). |
| 150 | +@@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, |
| 151 | + return false; |
| 152 | + } |
| 153 | + |
| 154 | +- // Check for missing fields. The osId field must be there, and we |
| 155 | +- // currently require that the physical id field is specified, also. |
| 156 | ++ // Check for missing fields. The osId field must be there. The physical |
| 157 | ++ // id field will be checked later. |
| 158 | + if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { |
| 159 | + CLEANUP_THREAD_INFO; |
| 160 | + *msg_id = kmp_i18n_str_MissingProcField; |
| 161 | + return false; |
| 162 | + } |
| 163 | +- if (threadInfo[0][pkgIdIndex] == UINT_MAX) { |
| 164 | +- CLEANUP_THREAD_INFO; |
| 165 | +- *msg_id = kmp_i18n_str_MissingPhysicalIDField; |
| 166 | +- return false; |
| 167 | +- } |
| 168 | + |
| 169 | + // Skip this proc if it is not included in the machine model. |
| 170 | + if (KMP_AFFINITY_CAPABLE() && |
| 171 | +@@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, |
| 172 | + } |
| 173 | + *line = 0; |
| 174 | + |
| 175 | ++ // At least on powerpc, Linux may return -1 for physical_package_id. Try |
| 176 | ++ // to reconstruct topology from core_siblings_list in that case. |
| 177 | ++ for (i = 0; i < num_avail; ++i) { |
| 178 | ++ if (threadInfo[i][pkgIdIndex] == UINT_MAX) { |
| 179 | ++ if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) { |
| 180 | ++ CLEANUP_THREAD_INFO; |
| 181 | ++ *msg_id = kmp_i18n_str_MissingPhysicalIDField; |
| 182 | ++ return false; |
| 183 | ++ } |
| 184 | ++ } |
| 185 | ++ } |
| 186 | ++ |
| 187 | + #if KMP_MIC && REDUCE_TEAM_SIZE |
| 188 | + unsigned teamSize = 0; |
| 189 | + #endif // KMP_MIC && REDUCE_TEAM_SIZE |
| 190 | +diff --git a/openmp/runtime/test/affinity/kmp-hw-subset.c b/openmp/runtime/test/affinity/kmp-hw-subset.c |
| 191 | +index 606fcdfbada9..0b49969bd3b1 100644 |
| 192 | +--- a/openmp/runtime/test/affinity/kmp-hw-subset.c |
| 193 | ++++ b/openmp/runtime/test/affinity/kmp-hw-subset.c |
| 194 | +@@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places, |
| 195 | + expected_per_place = nthreads_per_core; |
| 196 | + } else { |
| 197 | + expected_total = nsockets; |
| 198 | +- expected_per_place = ncores_per_socket; |
| 199 | ++ expected_per_place = ncores_per_socket * nthreads_per_core; |
| 200 | + } |
| 201 | + if (openmp_places->num_places != expected_total) { |
| 202 | + fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n"); |
| 203 | +-- |
| 204 | +2.47.0 |
| 205 | + |
0 commit comments