@@ -49,10 +49,10 @@ struct zmm_vector<float16> {
49
49
50
50
static opmask_t ge (reg_t x, reg_t y)
51
51
{
52
- reg_t sign_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x8000 ));
53
- reg_t sign_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x8000 ));
54
- reg_t exp_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x7c00 ));
55
- reg_t exp_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x7c00 ));
52
+ reg_t sign_x = _mm512_and_si512 (x, _mm512_set1_epi16 (static_cast < short >( 0x8000 ) ));
53
+ reg_t sign_y = _mm512_and_si512 (y, _mm512_set1_epi16 (static_cast < short >( 0x8000 ) ));
54
+ reg_t exp_x = _mm512_and_si512 (x, _mm512_set1_epi16 (static_cast < short >( 0x7c00 ) ));
55
+ reg_t exp_y = _mm512_and_si512 (y, _mm512_set1_epi16 (static_cast < short >( 0x7c00 ) ));
56
56
reg_t mant_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x3ff ));
57
57
reg_t mant_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x3ff ));
58
58
@@ -62,7 +62,7 @@ struct zmm_vector<float16> {
62
62
__mmask32 neg = _mm512_mask_cmpeq_epu16_mask (
63
63
sign_eq,
64
64
sign_x,
65
- _mm512_set1_epi16 (0x8000 )); // both numbers are -ve
65
+ _mm512_set1_epi16 (static_cast < short >( 0x8000 ) )); // both numbers are -ve
66
66
67
67
// compare exponents only if signs are equal:
68
68
mask_ge = mask_ge
@@ -136,7 +136,7 @@ struct zmm_vector<float16> {
136
136
static type_t float_to_uint16 (float val)
137
137
{
138
138
__m128 xmm = _mm_load_ss (&val);
139
- __m128i xmm2 = _mm_cvtps_ph (xmm, _MM_FROUND_NO_EXC);
139
+ __m128i xmm2 = _mm_cvtps_ph (xmm, 0 ); // Use 0 (round to nearest) instead of _MM_FROUND_NO_EXC
140
140
return _mm_extract_epi16 (xmm2, 0 );
141
141
}
142
142
static type_t reducemax (reg_t v)
0 commit comments