Skip to content

Commit 3660d02

Browse files
committed
Detect presence of half precision conversion instructions (X86 only)
Several changes: 1) Add runtime/libpgmath/lib/x86_64/x86id.c and x86id.h 2) Add "X86IDFN(is_f16c())" and "X86IDFN(is_f16c_cached)" to cpuid/common/x86id.c 3) Add "is_f16c" to runtime/libpgmath/lib/x86_64/cpuid8664.h 4) runtime/libpgmath/lib/x86_64/x86id.h needs to export the correct function name and define global variable X86IDFN(is_f16c_cached) 5) rte/pgc/port/src/dispatch.c: Now that x86id.c has been moved in to libpgmath, change dispatch.c from using a subset of the CPUID "is_<FEATURE>" routines defined as local (static) in header file "cpuid8664.h" and use the functions defined in header file "x86id.h".
1 parent 87c7238 commit 3660d02

File tree

7 files changed

+2430
-81
lines changed

7 files changed

+2430
-81
lines changed

runtime/libpgmath/lib/common/dispatch.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
#include "mth_tbldefs.h"
8787

8888
#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
89-
#include "cpuid8664.h"
89+
#include "x86id.h"
9090
#endif
9191

9292
/*
@@ -1026,25 +1026,25 @@ __math_dispatch()
10261026

10271027
} else { /* Get processor architecture using CPUID information */
10281028
#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
1029-
if (CPUIDX8664(is_avx512vl)() == 1) {
1029+
if (X86IDFN(is_avx512vl)() == 1) {
10301030
__math_target = arch_avx512;
1031-
} else if (CPUIDX8664(is_avx512f)() == 1) {
1031+
} else if (X86IDFN(is_avx512f)() == 1) {
10321032
__math_target = arch_avx512knl;
1033-
} else if (CPUIDX8664(is_avx2)() == 1) {
1033+
} else if (X86IDFN(is_avx2)() == 1) {
10341034
__math_target = arch_avx2;
1035-
} else if (CPUIDX8664(is_avx)() == 1) {
1036-
if (CPUIDX8664(is_intel)() == 1) {
1035+
} else if (X86IDFN(is_avx)() == 1) {
1036+
if (X86IDFN(is_intel)() == 1) {
10371037
__math_target = arch_avx;
10381038
}
1039-
if (CPUIDX8664(is_amd)() == 1) {
1040-
if (CPUIDX8664(is_fma4)() == 1) {
1039+
if (X86IDFN(is_amd)() == 1) {
1040+
if (X86IDFN(is_fma4)() == 1) {
10411041
__math_target = arch_avxfma4;
10421042
} else {
10431043
__math_target = arch_sse4;
10441044
}
10451045
}
10461046
} else {
1047-
if ((CPUIDX8664(is_sse4a)() == 1) || (CPUIDX8664(is_sse41)() == 1)) {
1047+
if ((X86IDFN(is_sse4a)() == 1) || (X86IDFN(is_sse41)() == 1)) {
10481048
__math_target = arch_sse4;
10491049
} else {
10501050
__math_target = arch_em64t;

runtime/libpgmath/lib/x86_64/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,16 @@ set(SRCS
5858
dsqrt.c
5959
fabs.c
6060
sqrt.c
61+
pgcpuid.c
6162
${ASM_SRCS})
6263
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS}" "")
6364

65+
# Decorate entry points and global objects in x86id with an internal prefix.
66+
set(SRCS
67+
x86id.c)
68+
list(APPEND DEFINITIONS_FOR_LIBPGC ${DEFINITIONS} FOR_LIBPGC)
69+
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS_FOR_LIBPGC}" "for_libpgc")
70+
6471
# isoc99
6572
set(SRCS
6673
alog.c

runtime/libpgmath/lib/x86_64/cpuid8664.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ static int CPUIDX8664(is_amd)();
6161
static int CPUIDX8664(is_fma4)();
6262
static int CPUIDX8664(is_sse4a)();
6363
static int CPUIDX8664(is_sse41)();
64+
static int CPUIDX8664(is_f16c)();
6465

6566
/*
6667
* Check that this is a Genuine Intel processor
@@ -296,6 +297,30 @@ CPUIDX8664(is_avx512vl)(void)
296297
return (ebx & bit_AVX512VL) != 0;
297298
}/* is_avx512vl */
298299

300+
/*
301+
* Check that this is either a Genuine Intel or AMD processor that supports
302+
* f16c instructions.
303+
*/
304+
static int
305+
CPUIDX8664(is_f16c)(void)
306+
{
307+
uint32_t eax, ebx, ecx, edx;
308+
309+
if ((CPUIDX8664(is_intel)() == 0) && (CPUIDX8664(is_amd)() == 0)) {
310+
return 0;
311+
}
312+
313+
if (CPUIDX8664(is_avx)() == 0) {
314+
return 0;
315+
}
316+
317+
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
318+
return 0;
319+
}
320+
321+
return (ecx & bit_F16C) != 0;
322+
}/* is_f16c */
323+
299324
#ifdef UNIT_TEST
300325
int
301326
main()
@@ -309,6 +334,7 @@ main()
309334
printf("is_avx2()=%d\n", CPUIDX8664(is_avx2)());
310335
printf("is_avx512f()=%d\n", CPUIDX8664(is_avx512f)());
311336
printf("is_avx512vl()=%d\n", CPUIDX8664(is_avx512vl)());
337+
printf("is_f16c()=%d\n", CPUIDX8664(is_f16c)());
312338
}
313339
#endif
314340
#endif // #ifndef CPUIDX8664_H
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
*/
17+
18+
#include <stdint.h>
19+
#include "pgcpuid.h"
20+
21+
/*
22+
* Note:
23+
* 1) these functions cannot call any other function
24+
* 2) these functions can only use GPR (not floating point)
25+
*
26+
*/
27+
28+
/** @brief returns false/true if CPUID supports eax function.
29+
* __pgi_cpuid_getma (uint32_t cpuid_func)
30+
* @param cpuid_func (I1) function to execute CPUID with
31+
*
32+
* Returns false(0)/true(1)
33+
*
34+
*/
35+
36+
int
37+
__pgi_cpuid_getmax(uint32_t f)
38+
{
39+
uint32_t maxcpueax;
40+
uint32_t fin = f & 0x80000000;
41+
asm("\tcpuid"
42+
: "=a"(maxcpueax)
43+
: "0"(fin)
44+
: "ebx", "ecx", "edx"
45+
);
46+
return f <= maxcpueax;
47+
}
48+
49+
/** @brief returns results of executing CPUID with function cpuid_func and
50+
* sub function ecx.
51+
* __pgi_cpuid_ecx(uint32_t cpuid_func, uint32_t *res, uint32_t ecx)
52+
* @param cpuid_func (I1) function to execute CPUID with
53+
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
54+
* @param ecx (I3) value of %ecx to execute CPUID with
55+
*
56+
* Returns false(0): if cpuid_func not supported
57+
* true(1): CPUID successfully executed with cpuid_func+ecx and:
58+
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
59+
*
60+
*/
61+
62+
int
63+
__pgi_cpuid_ecx(uint32_t f, uint32_t *r, uint32_t c)
64+
{
65+
if (__pgi_cpuid_getmax(f) == 0) return 0;
66+
asm("\tcpuid"
67+
: "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
68+
: "0"(f), "2"(c)
69+
:
70+
);
71+
return 1;
72+
}
73+
74+
75+
/** @brief returns results of executing CPUID with function cpuid_func.
76+
* __pgi_cpuid(uint32_t cpuid_func, uint32_t *res)
77+
* @param cpuid_func (I1) function to execute CPUID with
78+
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
79+
*
80+
* Returns false(0): if cpuid_func not supported
81+
* true(1): CPUID successfully executed with cpuid_func and:
82+
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
83+
*
84+
*/
85+
86+
int
87+
__pgi_cpuid(uint32_t f, uint32_t *r)
88+
{
89+
return __pgi_cpuid_ecx(f, r, 0);
90+
}
91+
92+
/** @brief returns results of executing CPUID with function cpuid_func.
93+
* __pgcpuid(uint32_t cpuid_func, uint32_t *res)
94+
* @param cpuid_func (I1) function to execute CPUID with
95+
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
96+
*
97+
* Returns false(0): if cpuid_func not supported
98+
* true(1): CPUID successfully executed with cpuid_func and:
99+
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
100+
*
101+
*/
102+
103+
int
104+
__pgcpuid(uint32_t f, uint32_t *r)
105+
{
106+
return __pgi_cpuid_ecx(f, r, 0);
107+
}
108+
109+
/** @brief read extended control register.
110+
* __pgi_getbv(uint32_t xcr_num, uint64_t *xcr_res)
111+
* @param xcr_num (I1) extended control register number to read
112+
* @param xcr_res (I2) pointer to buffer to store xcr[xcr_num]
113+
*
114+
* Returns true(1) with:
115+
* xcr_res[31: 0]=%eax
116+
* xcr_res[63:32]=%edx
117+
*
118+
*/
119+
int
120+
__pgi_getbv(uint32_t f, uint64_t *r)
121+
{
122+
uint32_t *u32;
123+
u32 = (uint32_t *)r;
124+
asm(
125+
#if defined(__WIN64)
126+
"\t.byte\t0x0f, 0x01, 0xd0"
127+
#else
128+
"\txgetbv"
129+
#endif
130+
: "=a"(u32[0]), "=d"(u32[1])
131+
: "c"(f)
132+
:
133+
);
134+
return 1;
135+
}

0 commit comments

Comments
 (0)