Skip to content

Commit a346101

Browse files
authored
simd.h: fix a bunch of broken code for AVX512VL (#1781)
1 parent 5ba6751 commit a346101

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

src/include/OpenImageIO/simd.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,7 +4051,7 @@ OIIO_FORCEINLINE void vint4::load_mask (const vbool_t& mask, const value_t *valu
40514051

40524052
OIIO_FORCEINLINE void vint4::store_mask (int mask, value_t *values) const {
40534053
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4054-
_mm_mask_storeu_epi32 (__mmask8(mask), (const simd_t *)values);
4054+
_mm_mask_storeu_epi32 (values, __mmask8(mask), m_simd);
40554055
#elif OIIO_SIMD_AVX >= 2
40564056
_mm_maskstore_epi32 (values, _mm_castps_si128(vbool_t::from_bitmask(mask)), m_simd);
40574057
#else
@@ -4062,7 +4062,7 @@ OIIO_FORCEINLINE void vint4::store_mask (int mask, value_t *values) const {
40624062

40634063
OIIO_FORCEINLINE void vint4::store_mask (const vbool_t& mask, value_t *values) const {
40644064
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4065-
_mm_mask_storeu_epi32 (__mmask8(mask), (const simd_t *)values);
4065+
_mm_mask_storeu_epi32 (values, mask.bitmask(), m_simd);
40664066
#elif OIIO_SIMD_AVX >= 2
40674067
_mm_maskstore_epi32 (values, _mm_castps_si128(mask), m_simd);
40684068
#else
@@ -4110,7 +4110,7 @@ vint4::scatter_mask (const bool_t& mask, value_t *baseptr,
41104110
const vint_t& vindex) const
41114111
{
41124112
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4113-
_mm_mask_i32scatter_epi32 (baseptr, mask, vindex, m_simd, scale);
4113+
_mm_mask_i32scatter_epi32 (baseptr, mask.bitmask(), vindex, m_simd, scale);
41144114
#else
41154115
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
41164116
#endif
@@ -4820,7 +4820,7 @@ OIIO_FORCEINLINE void vint8::load_mask (const vbool8& mask, const int *values) {
48204820

48214821
OIIO_FORCEINLINE void vint8::store_mask (int mask, int *values) const {
48224822
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4823-
_mm256_mask_storeu_epi32 (__mmask8(mask), (const simd_t *)values);
4823+
_mm256_mask_storeu_epi32 (values, __mmask8(mask), m_simd);
48244824
#elif OIIO_SIMD_AVX >= 2
48254825
_mm256_maskstore_epi32 (values, _mm256_castps_si256(vbool8::from_bitmask(mask)), m_simd);
48264826
#else
@@ -4831,7 +4831,7 @@ OIIO_FORCEINLINE void vint8::store_mask (int mask, int *values) const {
48314831

48324832
OIIO_FORCEINLINE void vint8::store_mask (const vbool8& mask, int *values) const {
48334833
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4834-
_mm256_mask_storeu_epi32 (__mmask8(mask), (const simd_t *)values);
4834+
_mm256_mask_storeu_epi32 (values, __mmask8(mask.bitmask()), m_simd);
48354835
#elif OIIO_SIMD_AVX >= 2
48364836
_mm256_maskstore_epi32 (values, _mm256_castps_si256(mask), m_simd);
48374837
#else
@@ -4879,7 +4879,7 @@ vint8::scatter_mask (const bool_t& mask, value_t *baseptr,
48794879
const vint_t& vindex) const
48804880
{
48814881
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4882-
_mm256_mask_i32scatter_epi32 (baseptr, mask, vindex, m_simd, scale);
4882+
_mm256_mask_i32scatter_epi32 (baseptr, mask.bitmask(), vindex, m_simd, scale);
48834883
#else
48844884
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
48854885
#endif
@@ -6492,7 +6492,7 @@ OIIO_FORCEINLINE void vfloat4::load_mask (const vbool_t& mask, const float *valu
64926492

64936493
OIIO_FORCEINLINE void vfloat4::store_mask (int mask, float *values) const {
64946494
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6495-
m_simd = _mm_mask_storeu_ps (__mmask8(mask), (const simd_t *)values);
6495+
_mm_mask_storeu_ps (values, __mmask8(mask), m_simd);
64966496
#elif OIIO_SIMD_AVX
64976497
// Concern: is this really faster?
64986498
_mm_maskstore_ps (values, _mm_castps_si128(vbool_t::from_bitmask(mask)), m_simd);
@@ -6504,7 +6504,7 @@ OIIO_FORCEINLINE void vfloat4::store_mask (int mask, float *values) const {
65046504

65056505
OIIO_FORCEINLINE void vfloat4::store_mask (const vbool_t& mask, float *values) const {
65066506
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6507-
m_simd = _mm_mask_storeu_ps (__mmask8(mask.bitmask()), (const simd_t *)values);
6507+
_mm_mask_storeu_ps (values, __mmask8(mask.bitmask()), m_simd);
65086508
#elif OIIO_SIMD_AVX
65096509
// Concern: is this really faster?
65106510
_mm_maskstore_ps (values, _mm_castps_si128(mask.simd()), m_simd);
@@ -6553,7 +6553,7 @@ vfloat4::scatter_mask (const bool_t& mask, value_t *baseptr,
65536553
const vint_t& vindex) const
65546554
{
65556555
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6556-
_mm_mask_i32scatter_ps (baseptr, mask, vindex, m_simd, scale);
6556+
_mm_mask_i32scatter_ps (baseptr, mask.bitmask(), vindex, m_simd, scale);
65576557
#else
65586558
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
65596559
#endif
@@ -8162,7 +8162,7 @@ OIIO_FORCEINLINE void vfloat8::load_mask (const vbool8& mask, const float *value
81628162

81638163
OIIO_FORCEINLINE void vfloat8::store_mask (int mask, float *values) const {
81648164
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8165-
m_simd = _mm256_mask_storeu_ps (__mmask8(mask), (const simd_t *)values);
8165+
_mm256_mask_storeu_ps (values, __mmask8(mask), m_simd);
81668166
#elif OIIO_SIMD_AVX
81678167
// Concern: is this really faster?
81688168
_mm256_maskstore_ps (values, _mm256_castps_si256(vbool8::from_bitmask(mask)), m_simd);
@@ -8174,7 +8174,7 @@ OIIO_FORCEINLINE void vfloat8::store_mask (int mask, float *values) const {
81748174

81758175
OIIO_FORCEINLINE void vfloat8::store_mask (const vbool8& mask, float *values) const {
81768176
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8177-
m_simd = _mm256_mask_storeu_ps (__mmask8(mask.bitmask()), (const simd_t *)values);
8177+
_mm256_mask_storeu_ps (values, __mmask8(mask.bitmask()), m_simd);
81788178
#elif OIIO_SIMD_AVX
81798179
// Concern: is this really faster?
81808180
_mm256_maskstore_ps (values, _mm256_castps_si256(mask.simd()), m_simd);
@@ -8223,7 +8223,7 @@ vfloat8::scatter_mask (const bool_t& mask, value_t *baseptr,
82238223
const vint_t& vindex) const
82248224
{
82258225
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8226-
_mm256_mask_i32scatter_ps (baseptr, mask, vindex, m_simd, scale);
8226+
_mm256_mask_i32scatter_ps (baseptr, mask.bitmask(), vindex, m_simd, scale);
82278227
#else
82288228
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
82298229
#endif

0 commit comments

Comments
 (0)