Skip to content

Commit ab57416

Browse files
ethercrowBodigrim
authored andcommitted
Use SSE2 in the x86_64 C version of encodeUtf8
1 parent 7ad8768 commit ab57416

File tree

3 files changed

+30
-22
lines changed

3 files changed

+30
-22
lines changed

benchmarks/haskell/Benchmarks.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ main = do
4545
, env (DecodeUtf8.initEnv (tf "russian.txt")) (DecodeUtf8.benchmark "russian")
4646
, env (DecodeUtf8.initEnv (tf "japanese.txt")) (DecodeUtf8.benchmark "japanese")
4747
, env (DecodeUtf8.initEnv (tf "ascii.txt")) (DecodeUtf8.benchmarkASCII)
48-
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
48+
, EncodeUtf8.benchmark "non-ASCII" "επανάληψη 竺法蘭共譯"
49+
, EncodeUtf8.benchmark "ASCII" "lorem ipsum"
4950
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
5051
, FileRead.benchmark (tf "russian.txt")
5152
, FoldLines.benchmark (tf "russian.txt")

benchmarks/haskell/Benchmarks/EncodeUtf8.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ import qualified Data.Text.Encoding as T
1818
import qualified Data.Text.Lazy as TL
1919
import qualified Data.Text.Lazy.Encoding as TL
2020

21-
benchmark :: String -> Benchmark
22-
benchmark string =
21+
benchmark :: String -> String -> Benchmark
22+
benchmark name string =
2323
bgroup "EncodeUtf8"
24-
[ bench "Text" $ whnf (B.length . T.encodeUtf8) text
25-
, bench "LazyText" $ whnf (BL.length . TL.encodeUtf8) lazyText
24+
[ bench ("Text (" ++ name ++ ")") $ whnf (B.length . T.encodeUtf8) text
25+
, bench ("LazyText (" ++ name ++ ")") $ whnf (BL.length . TL.encodeUtf8) lazyText
2626
]
2727
where
2828
-- The string in different formats

cbits/cbits.c

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -276,29 +276,36 @@ _hs_text_encode_utf8(uint8_t **destp, const uint16_t *src, size_t srcoff,
276276

277277
ascii:
278278
#if defined(__x86_64__)
279-
while (srcend - src >= 4) {
280-
uint64_t w = *((uint64_t *) src);
279+
while (srcend - src >= 8) {
280+
union { uint64_t halves[2]; __m128i whole; } eight_chars;
281+
eight_chars.whole = _mm_loadu_si128((__m128i *) src);
281282

283+
const uint64_t w = eight_chars.halves[0];
282284
if (w & 0xFF80FF80FF80FF80ULL) {
283285
if (!(w & 0x000000000000FF80ULL)) {
284-
*dest++ = w & 0xFFFF;
285-
src++;
286-
if (!(w & 0x00000000FF800000ULL)) {
287-
*dest++ = (w >> 16) & 0xFFFF;
288-
src++;
289-
if (!(w & 0x0000FF8000000000ULL)) {
290-
*dest++ = (w >> 32) & 0xFFFF;
291-
src++;
292-
}
293-
}
286+
*dest++ = w & 0xFFFF;
287+
src++;
288+
if (!(w & 0x00000000FF800000ULL)) {
289+
*dest++ = (w >> 16) & 0xFFFF;
290+
src++;
291+
if (!(w & 0x0000FF8000000000ULL)) {
292+
*dest++ = (w >> 32) & 0xFFFF;
293+
src++;
294+
}
295+
}
294296
}
295297
break;
296298
}
297-
*dest++ = w & 0xFFFF;
298-
*dest++ = (w >> 16) & 0xFFFF;
299-
*dest++ = (w >> 32) & 0xFFFF;
300-
*dest++ = w >> 48;
301-
src += 4;
299+
300+
if (eight_chars.halves[1] & 0xFF80FF80FF80FF80ULL) {
301+
break;
302+
}
303+
304+
const __m128i eight_ascii_chars = _mm_packus_epi16(eight_chars.whole, eight_chars.whole);
305+
_mm_storel_epi64((__m128i *)dest, eight_ascii_chars);
306+
307+
dest += 8;
308+
src += 8;
302309
}
303310
#endif
304311

0 commit comments

Comments
 (0)