Skip to content

Commit 30167d8

Browse files
committed
Added SSE intrinsics to scanner.
The gain seems to be only measurable on rather long messages.
1 parent 3cfcb0b commit 30167d8

File tree

3 files changed

+39
-8
lines changed

3 files changed

+39
-8
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ CC = gcc -std=c11
1717

1818
release release32 : CFLAGS = -O3 -Wall -Wextra -Iinclude -march=native -mtune=native \
1919
-fomit-frame-pointer -Wl,--as-needed -flto -ffunction-sections -fdata-sections -Wl,--gc-sections \
20-
-DNDEBUG -DRELEASE
20+
-DNDEBUG -DRELEASE -DUSE_SSE
2121

2222
debug : CFLAGS = -g -Wall -Wextra -Iinclude -DDEBUG
2323

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ _OS:_ Linux Mint 17.2 64bit
2626

2727
FIX message type | FIX specification | Validation | Average time to parse one message
2828
----------------------------------|------------------------------------------|------------|--------------------------------------------------
29-
NewOrderSingle('D') | Hand-coded spec. for this message only | No | 0.328 µs/msg
30-
NewOrderSingle('D') | Hand-coded spec. for this message only | Yes | 0.561 µs/msg
31-
NewOrderSingle('D') | Compiled full spec. for FIX.4.4 | Yes | 0.734 µs/msg
32-
MarketDataIncrementalRefresh('X') | Hand-coded spec. for this message only | Yes | 1.280 µs/msg
33-
MarketDataIncrementalRefresh('X') | Compiled full spec. for FIX.4.4 | Yes | 1.418 µs/msg
29+
NewOrderSingle('D') | Hand-coded spec. for this message only | No | 0.338 µs/msg
30+
NewOrderSingle('D') | Hand-coded spec. for this message only | Yes | 0.550 µs/msg
31+
NewOrderSingle('D') | Compiled full spec. for FIX.4.4 | Yes | 0.765 µs/msg
32+
MarketDataIncrementalRefresh('X') | Hand-coded spec. for this message only | Yes | 1.245 µs/msg
33+
MarketDataIncrementalRefresh('X') | Compiled full spec. for FIX.4.4 | Yes | 1.392 µs/msg
3434

3535
For more details see `doc/` directory of the project.

src/scanner.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030

3131
#include "fix_impl.h"
3232

33+
#ifdef USE_SSE
34+
#include <xmmintrin.h>
35+
#endif
36+
3337
// message buffer handling
3438
static
3539
char* make_space(fix_parser* const parser, char* dest, unsigned extra_len)
@@ -103,9 +107,36 @@ bool copy_chunk(scanner_state* const state)
103107
static
104108
unsigned char copy_cs(char* restrict dest, const char* restrict src, unsigned n)
105109
{
106-
unsigned char cs = (*dest++ = *src++);
110+
unsigned char cs = 0;
111+
112+
#ifdef USE_SSE
113+
if(n >= sizeof(__m128i))
114+
{
115+
__m128i cs128 = _mm_loadu_si128((const __m128i*)src);
116+
117+
src += sizeof(__m128i);
118+
_mm_storeu_si128((__m128i*)dest, cs128);
119+
dest += sizeof(__m128i);
120+
121+
while((n -= sizeof(__m128i)) >= sizeof(__m128i))
122+
{
123+
const __m128i tmp = _mm_loadu_si128((const __m128i*)src);
124+
125+
src += sizeof(__m128i);
126+
_mm_storeu_si128((__m128i*)dest, tmp);
127+
dest += sizeof(__m128i);
128+
cs128 = _mm_add_epi8(cs128, tmp);
129+
}
130+
131+
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 8));
132+
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 4));
133+
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 2));
134+
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 1));
135+
cs += _mm_extract_epi16(cs128, 0); // SSE4: _mm_extract_epi8 ?
136+
}
137+
#endif // #ifdef USE_SSE
107138

108-
while(--n > 0)
139+
while(n-- > 0)
109140
cs += (*dest++ = *src++);
110141

111142
return cs;

0 commit comments

Comments
 (0)