Skip to content

Commit 17c16f2

Browse files
authored
Implement builtin_cpu_is and limit cpu choices to P8 and P9 for NVIDIA compilers
1 parent 91c3f86 commit 17c16f2

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed

driver/others/dynamic_power.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ static char *corename[] = {
2727
#define NUM_CORETYPES 4
2828

2929
char *gotoblas_corename(void) {
30+
#ifndef C_PGI
3031
if (gotoblas == &gotoblas_POWER6) return corename[1];
32+
#endif
3133
if (gotoblas == &gotoblas_POWER8) return corename[2];
3234
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
3335
if (gotoblas == &gotoblas_POWER9) return corename[3];
@@ -38,10 +40,157 @@ char *gotoblas_corename(void) {
3840
return corename[0];
3941
}
4042

43+
#ifdef C_PGI
44+
/*
45+
* NV HPC compilers do not yet implement __builtin_cpu_is().
46+
* Fake a version here for use in the CPU detection code below.
47+
*
48+
* Strategy here is to first check the CPU to see what it actually is,
49+
* and then test the input to see if what the CPU actually is matches
50+
* what was requested.
51+
*/
52+
53+
#include <string.h>
54+
55+
/*
56+
* Define POWER processor version table.
57+
*
58+
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
59+
*/
60+
61+
#define CPU_UNKNOWN 0
62+
#define CPU_POWER5 5
63+
#define CPU_POWER6 6
64+
#define CPU_POWER8 8
65+
#define CPU_POWER9 9
66+
#define CPU_POWER10 10
67+
68+
static struct {
69+
uint32_t pvr_mask;
70+
uint32_t pvr_value;
71+
const char* cpu_name;
72+
uint32_t cpu_type;
73+
} pvrPOWER [] = {
74+
75+
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
76+
.pvr_mask = 0xffffffff,
77+
.pvr_value = 0x0f000001,
78+
.cpu_name = "POWER5+",
79+
.cpu_type = CPU_POWER5,
80+
},
81+
82+
{ /* Power6 aka POWER6X*/
83+
.pvr_mask = 0xffff0000,
84+
.pvr_value = 0x003e0000,
85+
.cpu_name = "POWER6 (raw)",
86+
.cpu_type = CPU_POWER6,
87+
},
88+
89+
{ /* Power7 */
90+
.pvr_mask = 0xffff0000,
91+
.pvr_value = 0x003f0000,
92+
.cpu_name = "POWER7 (raw)",
93+
.cpu_type = CPU_POWER6,
94+
},
95+
96+
{ /* Power7+ */
97+
.pvr_mask = 0xffff0000,
98+
.pvr_value = 0x004A0000,
99+
.cpu_name = "POWER7+ (raw)",
100+
.cpu_type = CPU_POWER6,
101+
},
102+
103+
{ /* Power8E */
104+
.pvr_mask = 0xffff0000,
105+
.pvr_value = 0x004b0000,
106+
.cpu_name = "POWER8E (raw)",
107+
.cpu_type = CPU_POWER8,
108+
},
109+
110+
{ /* Power8NVL */
111+
.pvr_mask = 0xffff0000,
112+
.pvr_value = 0x004c0000,
113+
.cpu_name = "POWER8NVL (raw)",
114+
.cpu_type = CPU_POWER8,
115+
},
116+
117+
{ /* Power8 */
118+
.pvr_mask = 0xffff0000,
119+
.pvr_value = 0x004d0000,
120+
.cpu_name = "POWER8 (raw)",
121+
.cpu_type = CPU_POWER8,
122+
},
123+
124+
{ /* Power9 DD2.0 */
125+
.pvr_mask = 0xffffefff,
126+
.pvr_value = 0x004e0200,
127+
.cpu_name = "POWER9 (raw)",
128+
.cpu_type = CPU_POWER9,
129+
},
130+
131+
{ /* Power9 DD 2.1 */
132+
.pvr_mask = 0xffffefff,
133+
.pvr_value = 0x004e0201,
134+
.cpu_name = "POWER9 (raw)",
135+
.cpu_type = CPU_POWER9,
136+
},
137+
138+
{ /* Power9 DD2.2 or later */
139+
.pvr_mask = 0xffff0000,
140+
.pvr_value = 0x004e0000,
141+
.cpu_name = "POWER9 (raw)",
142+
.cpu_type = CPU_POWER9,
143+
},
144+
145+
{ /* Power10 */
146+
.pvr_mask = 0xffff0000,
147+
.pvr_value = 0x00800000,
148+
.cpu_name = "POWER10 (raw)",
149+
.cpu_type = CPU_POWER10,
150+
},
151+
152+
{ /* End of table, pvr_mask and pvr_value must be zero */
153+
.pvr_mask = 0x0,
154+
.pvr_value = 0x0,
155+
.cpu_name = "Unknown",
156+
.cpu_type = CPU_UNKNOWN,
157+
},
158+
};
159+
160+
static int __builtin_cpu_is(const char *cpu) {
161+
int i;
162+
uint32_t pvr;
163+
uint32_t cpu_type;
164+
165+
asm("mfpvr %0" : "=r"(pvr));
166+
167+
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
168+
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
169+
break;
170+
}
171+
}
172+
173+
#if defined(DEBUG)
174+
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
175+
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
176+
#endif
177+
cpu_type = pvrPOWER[i].cpu_type;
178+
179+
if (!strcmp(cpu, "power8"))
180+
return cpu_type == CPU_POWER8;
181+
if (!strcmp(cpu, "power9"))
182+
return cpu_type == CPU_POWER9;
183+
return 0;
184+
}
185+
186+
#endif /* C_PGI */
187+
41188
static gotoblas_t *get_coretype(void) {
42189

190+
#ifndef C_PGI
43191
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
44192
return &gotoblas_POWER6;
193+
#endif
45194
if (__builtin_cpu_is("power8"))
46195
return &gotoblas_POWER8;
47196
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
@@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) {
77226

78227
switch (found)
79228
{
229+
#ifndef C_PGI
80230
case 1: return (&gotoblas_POWER6);
231+
#endif
81232
case 2: return (&gotoblas_POWER8);
82233
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
83234
case 3: return (&gotoblas_POWER9);

0 commit comments

Comments
 (0)