Skip to content

Commit c815611

Browse files
committed
Core (Memory): Copy 64 bytes per main loop iteration in SSE2 routines for mem_set16() and mem_set32().
1 parent dd76f9c commit c815611

File tree

1 file changed

+6
-14
lines changed

1 file changed

+6
-14
lines changed

libvisual/libvisual/lv_mem.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,13 @@ void *mem_set16_x86_sse2 (void *dest, int c, size_t n)
141141

142142
__m128i *m128i_ptr = (__m128i *) dest;
143143

144-
while (n >= 64) {
144+
while (n >= 32) {
145145
_mm_storeu_si128 (m128i_ptr, copy_4x);
146146
_mm_storeu_si128 (m128i_ptr + 1, copy_4x);
147147
_mm_storeu_si128 (m128i_ptr + 2, copy_4x);
148148
_mm_storeu_si128 (m128i_ptr + 3, copy_4x);
149-
_mm_storeu_si128 (m128i_ptr + 4, copy_4x);
150-
_mm_storeu_si128 (m128i_ptr + 5, copy_4x);
151-
_mm_storeu_si128 (m128i_ptr + 6, copy_4x);
152-
_mm_storeu_si128 (m128i_ptr + 7, copy_4x);
153-
n -= 64;
154-
m128i_ptr += 8;
149+
n -= 32;
150+
m128i_ptr += 4;
155151
}
156152

157153
uint16_t *uint16_ptr = (uint16_t *) m128i_ptr;
@@ -198,17 +194,13 @@ static void *mem_set32_x86_sse2 (void *dest, int c, size_t n)
198194
const __m128i copy_4x = _mm_set_epi32 (copy, copy, copy, copy);
199195

200196
__m128i *m128i_ptr = (__m128i *) dest;
201-
while (n >= 32) {
197+
while (n >= 16) {
202198
_mm_storeu_si128 (m128i_ptr, copy_4x);
203199
_mm_storeu_si128 (m128i_ptr + 1, copy_4x);
204200
_mm_storeu_si128 (m128i_ptr + 2, copy_4x);
205201
_mm_storeu_si128 (m128i_ptr + 3, copy_4x);
206-
_mm_storeu_si128 (m128i_ptr + 4, copy_4x);
207-
_mm_storeu_si128 (m128i_ptr + 5, copy_4x);
208-
_mm_storeu_si128 (m128i_ptr + 6, copy_4x);
209-
_mm_storeu_si128 (m128i_ptr + 7, copy_4x);
210-
n -= 32;
211-
m128i_ptr += 8;
202+
n -= 16;
203+
m128i_ptr += 4;
212204
}
213205

214206
uint32_t *uint32_ptr = (uint32_t *) m128i_ptr;

0 commit comments

Comments
 (0)