@@ -141,17 +141,13 @@ void *mem_set16_x86_sse2 (void *dest, int c, size_t n)
141141
142142 __m128i * m128i_ptr = (__m128i * ) dest ;
143143
144- while (n >= 64 ) {
144+ while (n >= 32 ) {
145145 _mm_storeu_si128 (m128i_ptr , copy_4x );
146146 _mm_storeu_si128 (m128i_ptr + 1 , copy_4x );
147147 _mm_storeu_si128 (m128i_ptr + 2 , copy_4x );
148148 _mm_storeu_si128 (m128i_ptr + 3 , copy_4x );
149- _mm_storeu_si128 (m128i_ptr + 4 , copy_4x );
150- _mm_storeu_si128 (m128i_ptr + 5 , copy_4x );
151- _mm_storeu_si128 (m128i_ptr + 6 , copy_4x );
152- _mm_storeu_si128 (m128i_ptr + 7 , copy_4x );
153- n -= 64 ;
154- m128i_ptr += 8 ;
149+ n -= 32 ;
150+ m128i_ptr += 4 ;
155151 }
156152
157153 uint16_t * uint16_ptr = (uint16_t * ) m128i_ptr ;
@@ -198,17 +194,13 @@ static void *mem_set32_x86_sse2 (void *dest, int c, size_t n)
198194 const __m128i copy_4x = _mm_set_epi32 (copy , copy , copy , copy );
199195
200196 __m128i * m128i_ptr = (__m128i * ) dest ;
201- while (n >= 32 ) {
197+ while (n >= 16 ) {
202198 _mm_storeu_si128 (m128i_ptr , copy_4x );
203199 _mm_storeu_si128 (m128i_ptr + 1 , copy_4x );
204200 _mm_storeu_si128 (m128i_ptr + 2 , copy_4x );
205201 _mm_storeu_si128 (m128i_ptr + 3 , copy_4x );
206- _mm_storeu_si128 (m128i_ptr + 4 , copy_4x );
207- _mm_storeu_si128 (m128i_ptr + 5 , copy_4x );
208- _mm_storeu_si128 (m128i_ptr + 6 , copy_4x );
209- _mm_storeu_si128 (m128i_ptr + 7 , copy_4x );
210- n -= 32 ;
211- m128i_ptr += 8 ;
202+ n -= 16 ;
203+ m128i_ptr += 4 ;
212204 }
213205
214206 uint32_t * uint32_ptr = (uint32_t * ) m128i_ptr ;
0 commit comments