@@ -96,87 +96,87 @@ namespace xsimd
9696 * batch cast functions implementation *
9797 *****************************************/
9898
99- XSIMD_BATCH_CAST_IMPLICIT (int8_t , uint8_t , 64 );
100- XSIMD_BATCH_CAST_IMPLICIT (uint8_t , int8_t , 64 );
101- XSIMD_BATCH_CAST_IMPLICIT (int16_t , uint16_t , 32 );
102- XSIMD_BATCH_CAST_INTRINSIC (int16_t , int32_t , 16 , _mm512_cvtepi16_epi32);
103- XSIMD_BATCH_CAST_INTRINSIC (int16_t , uint32_t , 16 , _mm512_cvtepi16_epi32);
104- XSIMD_BATCH_CAST_INTRINSIC (int16_t , int64_t , 8 , _mm512_cvtepi16_epi64);
105- XSIMD_BATCH_CAST_INTRINSIC (int16_t , uint64_t , 8 , _mm512_cvtepi16_epi64);
106- XSIMD_BATCH_CAST_INTRINSIC2 (int16_t , float , 16 , _mm512_cvtepi16_epi32, _mm512_cvtepi32_ps);
107- XSIMD_BATCH_CAST_IMPLICIT (uint16_t , int16_t , 32 );
108- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , int32_t , 16 , _mm512_cvtepu16_epi32);
109- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , uint32_t , 16 , _mm512_cvtepu16_epi32);
110- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , int64_t , 8 , _mm512_cvtepu16_epi64);
111- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , uint64_t , 8 , _mm512_cvtepu16_epi64);
112- XSIMD_BATCH_CAST_INTRINSIC2 (uint16_t , float , 16 , _mm512_cvtepu16_epi32, _mm512_cvtepi32_ps);
113- XSIMD_BATCH_CAST_INTRINSIC (int32_t , int8_t , 16 , _mm512_cvtepi32_epi8);
114- XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint8_t , 16 , _mm512_cvtepi32_epi8);
115- XSIMD_BATCH_CAST_INTRINSIC (int32_t , int16_t , 16 , _mm512_cvtepi32_epi16);
116- XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint16_t , 16 , _mm512_cvtepi32_epi16);
117- XSIMD_BATCH_CAST_IMPLICIT (int32_t , uint32_t , 16 );
118- XSIMD_BATCH_CAST_INTRINSIC (int32_t , int64_t , 8 , _mm512_cvtepi32_epi64);
119- XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint64_t , 8 , _mm512_cvtepi32_epi64);
120- XSIMD_BATCH_CAST_INTRINSIC (int32_t , float , 16 , _mm512_cvtepi32_ps);
121- XSIMD_BATCH_CAST_INTRINSIC (int32_t , double , 8 , _mm512_cvtepi32_pd);
122- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int8_t , 16 , _mm512_cvtepi32_epi8);
123- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint8_t , 16 , _mm512_cvtepi32_epi8);
124- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int16_t , 16 , _mm512_cvtepi32_epi16);
125- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint16_t , 16 , _mm512_cvtepi32_epi16);
126- XSIMD_BATCH_CAST_IMPLICIT (uint32_t , int32_t , 16 );
127- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int64_t , 8 , _mm512_cvtepu32_epi64);
128- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint64_t , 8 , _mm512_cvtepu32_epi64);
129- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , float , 16 , _mm512_cvtepu32_ps);
130- XSIMD_BATCH_CAST_INTRINSIC (uint32_t , double , 8 , _mm512_cvtepu32_pd);
131- XSIMD_BATCH_CAST_INTRINSIC (int64_t , int16_t , 8 , _mm512_cvtepi64_epi16);
132- XSIMD_BATCH_CAST_INTRINSIC (int64_t , uint16_t , 8 , _mm512_cvtepi64_epi16);
133- XSIMD_BATCH_CAST_INTRINSIC (int64_t , int32_t , 8 , _mm512_cvtepi64_epi32);
134- XSIMD_BATCH_CAST_INTRINSIC (int64_t , uint32_t , 8 , _mm512_cvtepi64_epi32);
135- XSIMD_BATCH_CAST_IMPLICIT (int64_t , uint64_t , 8 );
136- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , int16_t , 8 , _mm512_cvtepi64_epi16);
137- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , uint16_t , 8 , _mm512_cvtepi64_epi16);
138- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , int32_t , 8 , _mm512_cvtepi64_epi32);
139- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , uint32_t , 8 , _mm512_cvtepi64_epi32);
140- XSIMD_BATCH_CAST_IMPLICIT (uint64_t , int64_t , 8 );
141- XSIMD_BATCH_CAST_INTRINSIC2 (float , int8_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi8);
142- XSIMD_BATCH_CAST_INTRINSIC2 (float , uint8_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi8);
143- XSIMD_BATCH_CAST_INTRINSIC2 (float , int16_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi16);
144- XSIMD_BATCH_CAST_INTRINSIC2 (float , uint16_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi16);
145- XSIMD_BATCH_CAST_INTRINSIC (float , int32_t , 16 , _mm512_cvttps_epi32);
146- XSIMD_BATCH_CAST_INTRINSIC (float , uint32_t , 16 , _mm512_cvttps_epu32);
147- XSIMD_BATCH_CAST_INTRINSIC (float , double , 8 , _mm512_cvtps_pd);
148- XSIMD_BATCH_CAST_INTRINSIC (double , int32_t , 8 , _mm512_cvttpd_epi32);
149- XSIMD_BATCH_CAST_INTRINSIC (double , uint32_t , 8 , _mm512_cvttpd_epu32);
150- XSIMD_BATCH_CAST_INTRINSIC (double , float , 8 , _mm512_cvtpd_ps);
99+ XSIMD_BATCH_CAST_IMPLICIT (int8_t , uint8_t , 64 )
100+ XSIMD_BATCH_CAST_IMPLICIT (uint8_t , int8_t , 64 )
101+ XSIMD_BATCH_CAST_IMPLICIT (int16_t , uint16_t , 32 )
102+ XSIMD_BATCH_CAST_INTRINSIC (int16_t , int32_t , 16 , _mm512_cvtepi16_epi32)
103+ XSIMD_BATCH_CAST_INTRINSIC (int16_t , uint32_t , 16 , _mm512_cvtepi16_epi32)
104+ XSIMD_BATCH_CAST_INTRINSIC (int16_t , int64_t , 8 , _mm512_cvtepi16_epi64)
105+ XSIMD_BATCH_CAST_INTRINSIC (int16_t , uint64_t , 8 , _mm512_cvtepi16_epi64)
106+ XSIMD_BATCH_CAST_INTRINSIC2 (int16_t , float , 16 , _mm512_cvtepi16_epi32, _mm512_cvtepi32_ps)
107+ XSIMD_BATCH_CAST_IMPLICIT (uint16_t , int16_t , 32 )
108+ XSIMD_BATCH_CAST_INTRINSIC (uint16_t , int32_t , 16 , _mm512_cvtepu16_epi32)
109+ XSIMD_BATCH_CAST_INTRINSIC (uint16_t , uint32_t , 16 , _mm512_cvtepu16_epi32)
110+ XSIMD_BATCH_CAST_INTRINSIC (uint16_t , int64_t , 8 , _mm512_cvtepu16_epi64)
111+ XSIMD_BATCH_CAST_INTRINSIC (uint16_t , uint64_t , 8 , _mm512_cvtepu16_epi64)
112+ XSIMD_BATCH_CAST_INTRINSIC2 (uint16_t , float , 16 , _mm512_cvtepu16_epi32, _mm512_cvtepi32_ps)
113+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , int8_t , 16 , _mm512_cvtepi32_epi8)
114+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint8_t , 16 , _mm512_cvtepi32_epi8)
115+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , int16_t , 16 , _mm512_cvtepi32_epi16)
116+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint16_t , 16 , _mm512_cvtepi32_epi16)
117+ XSIMD_BATCH_CAST_IMPLICIT (int32_t , uint32_t , 16 )
118+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , int64_t , 8 , _mm512_cvtepi32_epi64)
119+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , uint64_t , 8 , _mm512_cvtepi32_epi64)
120+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , float , 16 , _mm512_cvtepi32_ps)
121+ XSIMD_BATCH_CAST_INTRINSIC (int32_t , double , 8 , _mm512_cvtepi32_pd)
122+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int8_t , 16 , _mm512_cvtepi32_epi8)
123+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint8_t , 16 , _mm512_cvtepi32_epi8)
124+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int16_t , 16 , _mm512_cvtepi32_epi16)
125+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint16_t , 16 , _mm512_cvtepi32_epi16)
126+ XSIMD_BATCH_CAST_IMPLICIT (uint32_t , int32_t , 16 )
127+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , int64_t , 8 , _mm512_cvtepu32_epi64)
128+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , uint64_t , 8 , _mm512_cvtepu32_epi64)
129+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , float , 16 , _mm512_cvtepu32_ps)
130+ XSIMD_BATCH_CAST_INTRINSIC (uint32_t , double , 8 , _mm512_cvtepu32_pd)
131+ XSIMD_BATCH_CAST_INTRINSIC (int64_t , int16_t , 8 , _mm512_cvtepi64_epi16)
132+ XSIMD_BATCH_CAST_INTRINSIC (int64_t , uint16_t , 8 , _mm512_cvtepi64_epi16)
133+ XSIMD_BATCH_CAST_INTRINSIC (int64_t , int32_t , 8 , _mm512_cvtepi64_epi32)
134+ XSIMD_BATCH_CAST_INTRINSIC (int64_t , uint32_t , 8 , _mm512_cvtepi64_epi32)
135+ XSIMD_BATCH_CAST_IMPLICIT (int64_t , uint64_t , 8 )
136+ XSIMD_BATCH_CAST_INTRINSIC (uint64_t , int16_t , 8 , _mm512_cvtepi64_epi16)
137+ XSIMD_BATCH_CAST_INTRINSIC (uint64_t , uint16_t , 8 , _mm512_cvtepi64_epi16)
138+ XSIMD_BATCH_CAST_INTRINSIC (uint64_t , int32_t , 8 , _mm512_cvtepi64_epi32)
139+ XSIMD_BATCH_CAST_INTRINSIC (uint64_t , uint32_t , 8 , _mm512_cvtepi64_epi32)
140+ XSIMD_BATCH_CAST_IMPLICIT (uint64_t , int64_t , 8 )
141+ XSIMD_BATCH_CAST_INTRINSIC2 (float , int8_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi8)
142+ XSIMD_BATCH_CAST_INTRINSIC2 (float , uint8_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi8)
143+ XSIMD_BATCH_CAST_INTRINSIC2 (float , int16_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi16)
144+ XSIMD_BATCH_CAST_INTRINSIC2 (float , uint16_t , 16 , _mm512_cvttps_epi32, _mm512_cvtepi32_epi16)
145+ XSIMD_BATCH_CAST_INTRINSIC (float , int32_t , 16 , _mm512_cvttps_epi32)
146+ XSIMD_BATCH_CAST_INTRINSIC (float , uint32_t , 16 , _mm512_cvttps_epu32)
147+ XSIMD_BATCH_CAST_INTRINSIC (float , double , 8 , _mm512_cvtps_pd)
148+ XSIMD_BATCH_CAST_INTRINSIC (double , int32_t , 8 , _mm512_cvttpd_epi32)
149+ XSIMD_BATCH_CAST_INTRINSIC (double , uint32_t , 8 , _mm512_cvttpd_epu32)
150+ XSIMD_BATCH_CAST_INTRINSIC (double , float , 8 , _mm512_cvtpd_ps)
151151#if defined(XSIMD_AVX512BW_AVAILABLE)
152- XSIMD_BATCH_CAST_INTRINSIC (int8_t , int16_t , 32 , _mm512_cvtepi8_epi16);
153- XSIMD_BATCH_CAST_INTRINSIC (int8_t , uint16_t , 32 , _mm512_cvtepi8_epi16);
154- XSIMD_BATCH_CAST_INTRINSIC (int8_t , int32_t , 16 , _mm512_cvtepi8_epi32);
155- XSIMD_BATCH_CAST_INTRINSIC (int8_t , uint32_t , 16 , _mm512_cvtepi8_epi32);
156- XSIMD_BATCH_CAST_INTRINSIC2 (int8_t , float , 16 , _mm512_cvtepi8_epi32, _mm512_cvtepi32_ps);
157- XSIMD_BATCH_CAST_INTRINSIC (uint8_t , int16_t , 32 , _mm512_cvtepu8_epi16);
158- XSIMD_BATCH_CAST_INTRINSIC (uint8_t , uint16_t , 32 , _mm512_cvtepu8_epi16);
159- XSIMD_BATCH_CAST_INTRINSIC (uint8_t , int32_t , 16 , _mm512_cvtepu8_epi32);
160- XSIMD_BATCH_CAST_INTRINSIC (uint8_t , uint32_t , 16 , _mm512_cvtepu8_epi32);
161- XSIMD_BATCH_CAST_INTRINSIC2 (uint8_t , float , 16 , _mm512_cvtepu8_epi32, _mm512_cvtepi32_ps);
162- XSIMD_BATCH_CAST_INTRINSIC (int16_t , int8_t , 32 , _mm512_cvtepi16_epi8);
163- XSIMD_BATCH_CAST_INTRINSIC (int16_t , uint8_t , 32 , _mm512_cvtepi16_epi8);
164- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , int8_t , 32 , _mm512_cvtepi16_epi8);
165- XSIMD_BATCH_CAST_INTRINSIC (uint16_t , uint8_t , 32 , _mm512_cvtepi16_epi8);
152+ XSIMD_BATCH_CAST_INTRINSIC (int8_t , int16_t , 32 , _mm512_cvtepi8_epi16)
153+ XSIMD_BATCH_CAST_INTRINSIC(int8_t , uint16_t , 32 , _mm512_cvtepi8_epi16)
154+ XSIMD_BATCH_CAST_INTRINSIC(int8_t , int32_t , 16 , _mm512_cvtepi8_epi32)
155+ XSIMD_BATCH_CAST_INTRINSIC(int8_t , uint32_t , 16 , _mm512_cvtepi8_epi32)
156+ XSIMD_BATCH_CAST_INTRINSIC2(int8_t , float , 16 , _mm512_cvtepi8_epi32, _mm512_cvtepi32_ps)
157+ XSIMD_BATCH_CAST_INTRINSIC(uint8_t , int16_t , 32 , _mm512_cvtepu8_epi16)
158+ XSIMD_BATCH_CAST_INTRINSIC(uint8_t , uint16_t , 32 , _mm512_cvtepu8_epi16)
159+ XSIMD_BATCH_CAST_INTRINSIC(uint8_t , int32_t , 16 , _mm512_cvtepu8_epi32)
160+ XSIMD_BATCH_CAST_INTRINSIC(uint8_t , uint32_t , 16 , _mm512_cvtepu8_epi32)
161+ XSIMD_BATCH_CAST_INTRINSIC2(uint8_t , float , 16 , _mm512_cvtepu8_epi32, _mm512_cvtepi32_ps)
162+ XSIMD_BATCH_CAST_INTRINSIC(int16_t , int8_t , 32 , _mm512_cvtepi16_epi8)
163+ XSIMD_BATCH_CAST_INTRINSIC(int16_t , uint8_t , 32 , _mm512_cvtepi16_epi8)
164+ XSIMD_BATCH_CAST_INTRINSIC(uint16_t , int8_t , 32 , _mm512_cvtepi16_epi8)
165+ XSIMD_BATCH_CAST_INTRINSIC(uint16_t , uint8_t , 32 , _mm512_cvtepi16_epi8)
166166#endif
167167#if defined(XSIMD_AVX512DQ_AVAILABLE)
168- XSIMD_BATCH_CAST_INTRINSIC2 (int16_t , double , 8 , _mm512_cvtepi16_epi64, _mm512_cvtepi64_pd);
169- XSIMD_BATCH_CAST_INTRINSIC2 (uint16_t , double , 8 , _mm512_cvtepu16_epi64, _mm512_cvtepi64_pd);
170- XSIMD_BATCH_CAST_INTRINSIC (int64_t , float , 8 , _mm512_cvtepi64_ps);
171- XSIMD_BATCH_CAST_INTRINSIC (int64_t , double , 8 , _mm512_cvtepi64_pd);
172- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , float , 8 , _mm512_cvtepu64_ps);
173- XSIMD_BATCH_CAST_INTRINSIC (uint64_t , double , 8 , _mm512_cvtepu64_pd);
174- XSIMD_BATCH_CAST_INTRINSIC (float , int64_t , 8 , _mm512_cvttps_epi64);
175- XSIMD_BATCH_CAST_INTRINSIC (float , uint64_t , 8 , _mm512_cvttps_epu64);
176- XSIMD_BATCH_CAST_INTRINSIC2 (double , int16_t , 8 , _mm512_cvttpd_epi64, _mm512_cvtepi64_epi16);
177- XSIMD_BATCH_CAST_INTRINSIC2 (double , uint16_t , 8 , _mm512_cvttpd_epi64, _mm512_cvtepi64_epi16);
178- XSIMD_BATCH_CAST_INTRINSIC (double , int64_t , 8 , _mm512_cvttpd_epi64);
179- XSIMD_BATCH_CAST_INTRINSIC (double , uint64_t , 8 , _mm512_cvttpd_epu64);
168+ XSIMD_BATCH_CAST_INTRINSIC2 (int16_t , double , 8 , _mm512_cvtepi16_epi64, _mm512_cvtepi64_pd)
169+ XSIMD_BATCH_CAST_INTRINSIC2(uint16_t , double , 8 , _mm512_cvtepu16_epi64, _mm512_cvtepi64_pd)
170+ XSIMD_BATCH_CAST_INTRINSIC(int64_t , float , 8 , _mm512_cvtepi64_ps)
171+ XSIMD_BATCH_CAST_INTRINSIC(int64_t , double , 8 , _mm512_cvtepi64_pd)
172+ XSIMD_BATCH_CAST_INTRINSIC(uint64_t , float , 8 , _mm512_cvtepu64_ps)
173+ XSIMD_BATCH_CAST_INTRINSIC(uint64_t , double , 8 , _mm512_cvtepu64_pd)
174+ XSIMD_BATCH_CAST_INTRINSIC(float , int64_t , 8 , _mm512_cvttps_epi64)
175+ XSIMD_BATCH_CAST_INTRINSIC(float , uint64_t , 8 , _mm512_cvttps_epu64)
176+ XSIMD_BATCH_CAST_INTRINSIC2(double , int16_t , 8 , _mm512_cvttpd_epi64, _mm512_cvtepi64_epi16)
177+ XSIMD_BATCH_CAST_INTRINSIC2(double , uint16_t , 8 , _mm512_cvttpd_epi64, _mm512_cvtepi64_epi16)
178+ XSIMD_BATCH_CAST_INTRINSIC(double , int64_t , 8 , _mm512_cvttpd_epi64)
179+ XSIMD_BATCH_CAST_INTRINSIC(double , uint64_t , 8 , _mm512_cvttpd_epu64)
180180#endif
181181
182182 /* *************************
0 commit comments