|
1 | 1 | import * as assert from 'assert';
|
2 | 2 | import * as frontend from 'llparse-frontend';
|
| 3 | +import { createSyncFn } from 'synckit' |
3 | 4 |
|
4 |
| -import { Compilation } from '../compilation'; |
| 5 | +import { Compilation, SseFamily } from '../compilation'; |
5 | 6 | import { Node } from './base';
|
6 | 7 |
|
7 | 8 | const MAX_CHAR = 0xff;
|
@@ -65,7 +66,86 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
|
65 | 66 | out.push('}');
|
66 | 67 | }
|
67 | 68 |
|
68 |
| - private buildSSE(out: string[]): boolean { |
| 69 | + private buildSSE2(out: string[]): boolean { |
| 70 | + // return false; |
| 71 | + const ctx = this.compilation; |
| 72 | + |
| 73 | + // Transformation is not supported atm |
| 74 | + if (this.ref.transform && this.ref.transform.ref.name !== 'id') { |
| 75 | + return false; |
| 76 | + } |
| 77 | + |
| 78 | + if (this.ref.edges.length !== 1) { |
| 79 | + return false; |
| 80 | + } |
| 81 | + |
| 82 | + const edge = this.ref.edges[0]; |
| 83 | + if (edge.node.ref !== this.ref) { |
| 84 | + return false; |
| 85 | + } |
| 86 | + |
| 87 | + let initial_lut = new Array<number>(256).fill(0); |
| 88 | + edge.keys.forEach(i => { initial_lut[i] = 1;}); |
| 89 | + |
| 90 | + // the worker path must be absolute |
| 91 | + const lutLowNibbleHighNibbleResolver = createSyncFn(require.resolve('./z3-lookup-solver'), { |
| 92 | + tsRunner: 'ts-node', // optional, can be `'ts-node' | 'esbuild-register' | 'esbuild-runner' | 'tsx'` |
| 93 | + }) |
| 94 | + |
| 95 | + // do whatever you want, you will get the result synchronously! |
| 96 | + const result = lutLowNibbleHighNibbleResolver(initial_lut) |
| 97 | + |
| 98 | + if (!result) { |
| 99 | + return false; |
| 100 | + } |
| 101 | + |
| 102 | + const blob1 = ctx.blob(Buffer.from(result[0]), SSE_ALIGNMENT, SseFamily.SSE2); |
| 103 | + const blob2 = ctx.blob(Buffer.from(result[1]), SSE_ALIGNMENT, SseFamily.SSE2); |
| 104 | + |
| 105 | + out.push('#ifdef __SSE2__'); |
| 106 | + out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); |
| 107 | + out.push(' int index;'); |
| 108 | + out.push(' __m128i lut_tlo;'); |
| 109 | + out.push(' __m128i lut_thi;'); |
| 110 | + out.push(` lut_tlo = _mm_load_si128((__m128i const*) ${blob1});`); |
| 111 | + out.push(` lut_thi = _mm_load_si128((__m128i const*) ${blob2});`); |
| 112 | + out.push(''); |
| 113 | + out.push(` for( ;${ctx.endPosArg()} - ${ctx.posArg()} >= 16; ${ctx.posArg()} += 16) {`); |
| 114 | + |
| 115 | + out.push(' __m128i lut_res_lo;'); |
| 116 | + out.push(' __m128i lut_res_hi;'); |
| 117 | + out.push(' __m128i input;'); |
| 118 | + out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`); |
| 119 | + out.push(''); |
| 120 | + out.push(' lut_res_lo = _mm_shuffle_epi8(lut_tlo, _mm_and_si128(input, _mm_set1_epi8(0x0F)));'); |
| 121 | + out.push(' lut_res_hi = _mm_shuffle_epi8(lut_thi, _mm_srli_epi16(_mm_and_si128(input, _mm_set1_epi8(0xF0)), 4));'); |
| 122 | + out.push(''); |
| 123 | + out.push(' input = _mm_cmpeq_epi8(_mm_and_si128(lut_res_lo, lut_res_hi), _mm_setzero_si128());'); |
| 124 | + out.push(' index = _mm_movemask_epi8(input);'); |
| 125 | + out.push(' if( 0 != index )'); |
| 126 | + out.push(' {'); |
| 127 | + out.push(' p += __builtin_ctz(index);'); |
| 128 | + { |
| 129 | + const tmp: string[] = []; |
| 130 | + this.tailTo(tmp, this.ref.otherwise!); |
| 131 | + ctx.indent(out, tmp, ' '); |
| 132 | + } |
| 133 | + out.push(' }'); |
| 134 | + |
| 135 | + out.push(' }'); |
| 136 | + const tmp: string[] = []; |
| 137 | + assert.strictEqual(edge.noAdvance, false); |
| 138 | + this.tailTo(tmp, { |
| 139 | + noAdvance: true, |
| 140 | + node: edge.node, |
| 141 | + }); |
| 142 | + ctx.indent(out, tmp, ' '); |
| 143 | + out.push('}'); |
| 144 | + out.push('#endif /* __SSE2__ */'); |
| 145 | + return true; |
| 146 | + } |
| 147 | + |
| 148 | + private buildSSE42(out: string[]): boolean { |
69 | 149 | const ctx = this.compilation;
|
70 | 150 |
|
71 | 151 | // Transformation is not supported atm
|
@@ -114,6 +194,7 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
|
114 | 194 | }
|
115 | 195 |
|
116 | 196 | out.push('#ifdef __SSE4_2__');
|
| 197 | + out.push('// ${}'); |
117 | 198 | out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`);
|
118 | 199 | out.push(' __m128i ranges;');
|
119 | 200 | out.push(' __m128i input;');
|
@@ -166,6 +247,13 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
|
166 | 247 | return true;
|
167 | 248 | }
|
168 | 249 |
|
| 250 | + private buildSSE(out: string[]): boolean { |
| 251 | + if (this.buildSSE2(out)){ |
| 252 | + return true; |
| 253 | + } |
| 254 | + return this.buildSSE42(out); |
| 255 | + } |
| 256 | + |
169 | 257 | private buildTable(): ITable {
|
170 | 258 | const table: number[] = new Array(MAX_CHAR + 1).fill(0);
|
171 | 259 |
|
|
0 commit comments