Skip to content

Commit 5d62d78

Browse files
cborinternal_p.h: Fix AVX2 build with MSVC
MSVC (and I think ICC too) are lacking the simpler, scalar instructions to convert from single-precision to half-precision and back. Instead, we need to use the packed data intrinsics. Fixes #192. Signed-off-by: Thiago Macieira <[email protected]>
1 parent 369959a commit 5d62d78

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

.appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ install:
1313
1414
if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" (call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.12\msvc2017_64)
1515
16-
if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2019" (call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.15\msvc2019_64)
16+
if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2019" (call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.15\msvc2019_64) & set CFLAGS=/arch:AVX2
1717
1818
set path=%PATH%;%QTDIR%\bin
1919
build_script:
2020
- cmd: >-
21-
nmake -f Makefile.nmake -nologo CFLAGS="-W3 -Os -MDd"
21+
nmake -f Makefile.nmake -nologo CFLAGS="%CFLAGS% -W3 -Os -MDd"
2222
2323
cd tests
2424

src/cborinternal_p.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,17 @@
3737
#endif
3838

3939
#ifndef CBOR_NO_HALF_FLOAT_TYPE
40-
# ifdef __F16C__
40+
# if defined(__F16C__) || defined(__AVX2__)
4141
# include <immintrin.h>
42-
static inline unsigned short encode_half(double val)
42+
static inline unsigned short encode_half(float val)
4343
{
44-
return _cvtss_sh((float)val, 3);
44+
__m128i m = _mm_cvtps_ph(_mm_set_ss(val), _MM_FROUND_CUR_DIRECTION);
45+
return _mm_extract_epi16(m, 0);
4546
}
46-
static inline double decode_half(unsigned short half)
47+
static inline float decode_half(unsigned short half)
4748
{
48-
return _cvtsh_ss(half);
49+
__m128i m = _mm_cvtsi32_si128(half);
50+
return _mm_cvtss_f32(_mm_cvtph_ps(m));
4951
}
5052
# else
5153
/* software implementation of float-to-fp16 conversions */

0 commit comments

Comments
 (0)