Skip to content

Commit c7e9611

Browse files
ethercrowBodigrim
authored andcommitted
Use SSE2 in the x86_64 C version of decodeUtf8
1 parent 8853069 commit c7e9611

File tree

1 file changed

+37
-15
lines changed

1 file changed

+37
-15
lines changed

cbits/cbits.c

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@
99
#include <string.h>
1010
#include <stdint.h>
1111
#include <stdio.h>
12+
#if defined(__x86_64__)
13+
#include <emmintrin.h>
14+
#include <xmmintrin.h>
15+
#endif
16+
1217
#include "text_cbits.h"
1318

19+
1420
void _hs_text_memcpy(void *dest, size_t doff, const void *src, size_t soff,
1521
size_t n)
1622
{
@@ -157,24 +163,40 @@ _hs_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
157163
*/
158164

159165
if (state == UTF8_ACCEPT) {
166+
#if defined(__x86_64__)
167+
const __m128i zeros = _mm_set1_epi32(0);
168+
while (s < srcend - 8) {
169+
const uint64_t hopefully_eight_ascii_chars = *((uint64_t *) s);
170+
if ((hopefully_eight_ascii_chars & 0x8080808080808080LL) != 0LL)
171+
break;
172+
s += 8;
173+
174+
/* Load 8 bytes of ASCII data */
175+
const __m128i eight_ascii_chars = _mm_cvtsi64_si128(hopefully_eight_ascii_chars);
176+
/* Interleave with zeros */
177+
const __m128i eight_utf16_chars = _mm_unpacklo_epi8(eight_ascii_chars, zeros);
178+
/* Store the resulting 8 bytes into destination */
179+
_mm_storeu_si128((__m128i *)d, eight_utf16_chars);
180+
d += 8;
181+
}
182+
#else
160183
while (s < srcend - 4) {
161-
codepoint = *((uint32_t *) s);
162-
if ((codepoint & 0x80808080) != 0)
163-
break;
164-
s += 4;
165-
166-
/*
167-
* Tried 32-bit stores here, but the extra bit-twiddling
168-
* slowed the code down.
169-
*/
170-
171-
*d++ = (uint16_t) (codepoint & 0xff);
172-
*d++ = (uint16_t) ((codepoint >> 8) & 0xff);
173-
*d++ = (uint16_t) ((codepoint >> 16) & 0xff);
174-
*d++ = (uint16_t) ((codepoint >> 24) & 0xff);
184+
codepoint = *((uint32_t *) s);
185+
if ((codepoint & 0x80808080) != 0)
186+
break;
187+
s += 4;
188+
/*
189+
* Tried 32-bit stores here, but the extra bit-twiddling
190+
* slowed the code down.
191+
*/
192+
*d++ = (uint16_t) (codepoint & 0xff);
193+
*d++ = (uint16_t) ((codepoint >> 8) & 0xff);
194+
*d++ = (uint16_t) ((codepoint >> 16) & 0xff);
195+
*d++ = (uint16_t) ((codepoint >> 24) & 0xff);
175196
}
197+
#endif
176198
last = s;
177-
}
199+
} /* end if (state == UTF8_ACCEPT) */
178200
#endif
179201

180202
if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {

0 commit comments

Comments
 (0)