@@ -4051,7 +4051,7 @@ OIIO_FORCEINLINE void vint4::load_mask (const vbool_t& mask, const value_t *valu
4051
4051
4052
4052
OIIO_FORCEINLINE void vint4::store_mask (int mask, value_t *values) const {
4053
4053
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4054
- _mm_mask_storeu_epi32 (__mmask8 (mask), ( const simd_t *)values );
4054
+ _mm_mask_storeu_epi32 (values, __mmask8 (mask), m_simd );
4055
4055
#elif OIIO_SIMD_AVX >= 2
4056
4056
_mm_maskstore_epi32 (values, _mm_castps_si128 (vbool_t::from_bitmask (mask)), m_simd);
4057
4057
#else
@@ -4062,7 +4062,7 @@ OIIO_FORCEINLINE void vint4::store_mask (int mask, value_t *values) const {
4062
4062
4063
4063
OIIO_FORCEINLINE void vint4::store_mask (const vbool_t & mask, value_t *values) const {
4064
4064
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4065
- _mm_mask_storeu_epi32 (__mmask8 (mask), ( const simd_t *)values );
4065
+ _mm_mask_storeu_epi32 (values, mask. bitmask (), m_simd );
4066
4066
#elif OIIO_SIMD_AVX >= 2
4067
4067
_mm_maskstore_epi32 (values, _mm_castps_si128 (mask), m_simd);
4068
4068
#else
@@ -4110,7 +4110,7 @@ vint4::scatter_mask (const bool_t& mask, value_t *baseptr,
4110
4110
const vint_t & vindex) const
4111
4111
{
4112
4112
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4113
- _mm_mask_i32scatter_epi32 (baseptr, mask, vindex, m_simd, scale);
4113
+ _mm_mask_i32scatter_epi32 (baseptr, mask. bitmask () , vindex, m_simd, scale);
4114
4114
#else
4115
4115
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
4116
4116
#endif
@@ -4820,7 +4820,7 @@ OIIO_FORCEINLINE void vint8::load_mask (const vbool8& mask, const int *values) {
4820
4820
4821
4821
OIIO_FORCEINLINE void vint8::store_mask (int mask, int *values) const {
4822
4822
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4823
- _mm256_mask_storeu_epi32 (__mmask8 (mask), ( const simd_t *)values );
4823
+ _mm256_mask_storeu_epi32 (values, __mmask8 (mask), m_simd );
4824
4824
#elif OIIO_SIMD_AVX >= 2
4825
4825
_mm256_maskstore_epi32 (values, _mm256_castps_si256 (vbool8::from_bitmask (mask)), m_simd);
4826
4826
#else
@@ -4831,7 +4831,7 @@ OIIO_FORCEINLINE void vint8::store_mask (int mask, int *values) const {
4831
4831
4832
4832
OIIO_FORCEINLINE void vint8::store_mask (const vbool8& mask, int *values) const {
4833
4833
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4834
- _mm256_mask_storeu_epi32 (__mmask8 (mask), ( const simd_t *)values );
4834
+ _mm256_mask_storeu_epi32 (values, __mmask8 (mask. bitmask ()), m_simd );
4835
4835
#elif OIIO_SIMD_AVX >= 2
4836
4836
_mm256_maskstore_epi32 (values, _mm256_castps_si256 (mask), m_simd);
4837
4837
#else
@@ -4879,7 +4879,7 @@ vint8::scatter_mask (const bool_t& mask, value_t *baseptr,
4879
4879
const vint_t & vindex) const
4880
4880
{
4881
4881
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
4882
- _mm256_mask_i32scatter_epi32 (baseptr, mask, vindex, m_simd, scale);
4882
+ _mm256_mask_i32scatter_epi32 (baseptr, mask. bitmask () , vindex, m_simd, scale);
4883
4883
#else
4884
4884
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
4885
4885
#endif
@@ -6492,7 +6492,7 @@ OIIO_FORCEINLINE void vfloat4::load_mask (const vbool_t& mask, const float *valu
6492
6492
6493
6493
OIIO_FORCEINLINE void vfloat4::store_mask (int mask, float *values) const {
6494
6494
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6495
- m_simd = _mm_mask_storeu_ps (__mmask8 (mask), ( const simd_t *)values );
6495
+ _mm_mask_storeu_ps (values, __mmask8 (mask), m_simd );
6496
6496
#elif OIIO_SIMD_AVX
6497
6497
// Concern: is this really faster?
6498
6498
_mm_maskstore_ps (values, _mm_castps_si128 (vbool_t::from_bitmask (mask)), m_simd);
@@ -6504,7 +6504,7 @@ OIIO_FORCEINLINE void vfloat4::store_mask (int mask, float *values) const {
6504
6504
6505
6505
OIIO_FORCEINLINE void vfloat4::store_mask (const vbool_t & mask, float *values) const {
6506
6506
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6507
- m_simd = _mm_mask_storeu_ps (__mmask8 (mask.bitmask ()), ( const simd_t *)values );
6507
+ _mm_mask_storeu_ps (values, __mmask8 (mask.bitmask ()), m_simd );
6508
6508
#elif OIIO_SIMD_AVX
6509
6509
// Concern: is this really faster?
6510
6510
_mm_maskstore_ps (values, _mm_castps_si128 (mask.simd ()), m_simd);
@@ -6553,7 +6553,7 @@ vfloat4::scatter_mask (const bool_t& mask, value_t *baseptr,
6553
6553
const vint_t & vindex) const
6554
6554
{
6555
6555
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
6556
- _mm_mask_i32scatter_ps (baseptr, mask, vindex, m_simd, scale);
6556
+ _mm_mask_i32scatter_ps (baseptr, mask. bitmask () , vindex, m_simd, scale);
6557
6557
#else
6558
6558
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
6559
6559
#endif
@@ -8162,7 +8162,7 @@ OIIO_FORCEINLINE void vfloat8::load_mask (const vbool8& mask, const float *value
8162
8162
8163
8163
OIIO_FORCEINLINE void vfloat8::store_mask (int mask, float *values) const {
8164
8164
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8165
- m_simd = _mm256_mask_storeu_ps (__mmask8 (mask), ( const simd_t *)values );
8165
+ _mm256_mask_storeu_ps (values, __mmask8 (mask), m_simd );
8166
8166
#elif OIIO_SIMD_AVX
8167
8167
// Concern: is this really faster?
8168
8168
_mm256_maskstore_ps (values, _mm256_castps_si256 (vbool8::from_bitmask (mask)), m_simd);
@@ -8174,7 +8174,7 @@ OIIO_FORCEINLINE void vfloat8::store_mask (int mask, float *values) const {
8174
8174
8175
8175
OIIO_FORCEINLINE void vfloat8::store_mask (const vbool8& mask, float *values) const {
8176
8176
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8177
- m_simd = _mm256_mask_storeu_ps (__mmask8 (mask.bitmask ()), ( const simd_t *)values );
8177
+ _mm256_mask_storeu_ps (values, __mmask8 (mask.bitmask ()), m_simd );
8178
8178
#elif OIIO_SIMD_AVX
8179
8179
// Concern: is this really faster?
8180
8180
_mm256_maskstore_ps (values, _mm256_castps_si256 (mask.simd ()), m_simd);
@@ -8223,7 +8223,7 @@ vfloat8::scatter_mask (const bool_t& mask, value_t *baseptr,
8223
8223
const vint_t & vindex) const
8224
8224
{
8225
8225
#if OIIO_SIMD_AVX >= 512 && OIIO_AVX512VL_ENABLED
8226
- _mm256_mask_i32scatter_ps (baseptr, mask, vindex, m_simd, scale);
8226
+ _mm256_mask_i32scatter_ps (baseptr, mask. bitmask () , vindex, m_simd, scale);
8227
8227
#else
8228
8228
SIMD_DO (if (mask[i]) *(value_t *)((char *)baseptr + vindex[i]*scale) = m_val[i]);
8229
8229
#endif
0 commit comments