diff --git a/package.json b/package.json index 344153e..4d0eef5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "llparse", - "version": "7.1.1", + "version": "7.2.0", "description": "Compile incremental parsers to C code", "main": "lib/api.js", "types": "lib/api.d.ts", @@ -34,7 +34,7 @@ "devDependencies": { "@types/debug": "^4.1.5", "@types/mocha": "^8.0.3", - "@types/node": "^14.11.8", + "@types/node": "^14.14.14", "esm": "^3.2.25", "llparse-test-fixture": "^5.0.1", "mocha": "^9.2.2", @@ -44,6 +44,8 @@ }, "dependencies": { "debug": "^4.2.0", - "llparse-frontend": "^3.0.0" + "llparse-frontend": "^3.0.0", + "z3-solver": "^4.12.2", + "synckit": "^0.8.5" } } diff --git a/src/implementation/c/compilation.ts b/src/implementation/c/compilation.ts index 4df05a6..ae0fe77 100644 --- a/src/implementation/c/compilation.ts +++ b/src/implementation/c/compilation.ts @@ -19,10 +19,17 @@ const BLOB_GROUP_SIZE = 11; type WrappedNode = frontend.IWrap; +// The SSE versions in use with the generator. +export enum SseFamily { + SSSE3 = 'SSSE3', + SSE4_2 = 'SSE4_2', +} + interface IBlob { readonly alignment: number | undefined; readonly buffer: Buffer; readonly name: string; + sseFamily: SseFamily; } // TODO(indutny): deduplicate @@ -78,7 +85,7 @@ export class Compilation { } if (blob.alignment) { - out.push('#ifdef __SSE4_2__'); + out.push(`#ifdef __${blob.sseFamily.toString()}__`); } out.push(`static const unsigned char${align} ${blob.name}[] = {`); @@ -107,7 +114,7 @@ export class Compilation { out.push(`};`); if (blob.alignment) { - out.push('#endif /* __SSE4_2__ */'); + out.push(`#endif /* __${blob.sseFamily.toString()}__ */`); } } out.push(''); @@ -320,9 +327,17 @@ export class Compilation { return JSON.stringify(value); } - public blob(value: Buffer, alignment?: number): string { + public blob(value: Buffer, alignment?: number, sseFamily?: SseFamily): string { + if(!sseFamily) { + sseFamily = SseFamily.SSE4_2 + } if (this.blobs.has(value)) { - return this.blobs.get(value)!.name; + let b = this.blobs.get(value)!; + if( b.sseFamily > sseFamily ) { + b.sseFamily = sseFamily; + } + + return b.name; } const res = BLOB_PREFIX + this.blobs.size; @@ -330,6 +345,7 @@ export class Compilation { alignment, buffer: value, name: res, + sseFamily: sseFamily, }); return res; } diff --git a/src/implementation/c/node/table-lookup.ts b/src/implementation/c/node/table-lookup.ts index 6a400a3..84e2892 100644 --- a/src/implementation/c/node/table-lookup.ts +++ b/src/implementation/c/node/table-lookup.ts @@ -1,7 +1,8 @@ import * as assert from 'assert'; import * as frontend from 'llparse-frontend'; +import { createSyncFn } from 'synckit' -import { Compilation } from '../compilation'; +import { Compilation, SseFamily } from '../compilation'; import { Node } from './base'; const MAX_CHAR = 0xff; @@ -65,7 +66,86 @@ export class TableLookup extends Node { out.push('}'); } - private buildSSE(out: string[]): boolean { + private buildSSSE3(out: string[]): boolean { + // return false; + const ctx = this.compilation; + + // Transformation is not supported atm + if (this.ref.transform && this.ref.transform.ref.name !== 'id') { + return false; + } + + if (this.ref.edges.length !== 1) { + return false; + } + + const edge = this.ref.edges[0]; + if (edge.node.ref !== this.ref) { + return false; + } + + let initial_lut = new Array(256).fill(0); + edge.keys.forEach(i => { initial_lut[i] = 1;}); + + // the worker path must be absolute + const lutLowNibbleHighNibbleResolver = createSyncFn(require.resolve('./z3-lookup-solver'), { + tsRunner: 'ts-node', // optional, can be `'ts-node' | 'esbuild-register' | 'esbuild-runner' | 'tsx'` + }) + + // do whatever you want, you will get the result synchronously! + const result = lutLowNibbleHighNibbleResolver(initial_lut) + + if (!result) { + return false; + } + + const blob1 = ctx.blob(Buffer.from(result[0]), SSE_ALIGNMENT, SseFamily.SSSE3); + const blob2 = ctx.blob(Buffer.from(result[1]), SSE_ALIGNMENT, SseFamily.SSSE3); + + out.push('#ifdef __SSSE3__'); + out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); + out.push(' int index;'); + out.push(' __m128i lut_tlo;'); + out.push(' __m128i lut_thi;'); + out.push(` lut_tlo = _mm_load_si128((__m128i const*) ${blob1});`); + out.push(` lut_thi = _mm_load_si128((__m128i const*) ${blob2});`); + out.push(''); + out.push(` for( ;${ctx.endPosArg()} - ${ctx.posArg()} >= 16; ${ctx.posArg()} += 16) {`); + + out.push(' __m128i lut_res_lo;'); + out.push(' __m128i lut_res_hi;'); + out.push(' __m128i input;'); + out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`); + out.push(''); + out.push(' lut_res_lo = _mm_shuffle_epi8(lut_tlo, _mm_and_si128(input, _mm_set1_epi8(0x0F)));'); + out.push(' lut_res_hi = _mm_shuffle_epi8(lut_thi, _mm_srli_epi16(_mm_and_si128(input, _mm_set1_epi8(0xF0)), 4));'); + out.push(''); + out.push(' input = _mm_cmpeq_epi8(_mm_and_si128(lut_res_lo, lut_res_hi), _mm_setzero_si128());'); + out.push(' index = _mm_movemask_epi8(input);'); + out.push(' if( 0 != index )'); + out.push(' {'); + out.push(' p += __builtin_ctz(index);'); + { + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + } + out.push(' }'); + + out.push(' }'); + const tmp: string[] = []; + assert.strictEqual(edge.noAdvance, false); + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + }); + ctx.indent(out, tmp, ' '); + out.push('}'); + out.push('#endif /* __SSSE3__ */'); + return true; + } + + private buildSSE42(out: string[]): boolean { const ctx = this.compilation; // Transformation is not supported atm @@ -114,6 +194,7 @@ export class TableLookup extends Node { } out.push('#ifdef __SSE4_2__'); + out.push('// ${}'); out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); out.push(' __m128i ranges;'); out.push(' __m128i input;'); @@ -166,6 +247,13 @@ export class TableLookup extends Node { return true; } + private buildSSE(out: string[]): boolean { + if (this.buildSSSE3(out)){ + return true; + } + return this.buildSSE42(out); + } + private buildTable(): ITable { const table: number[] = new Array(MAX_CHAR + 1).fill(0); diff --git a/src/implementation/c/node/z3-lookup-solver.ts b/src/implementation/c/node/z3-lookup-solver.ts new file mode 100644 index 0000000..9f63ab8 --- /dev/null +++ b/src/implementation/c/node/z3-lookup-solver.ts @@ -0,0 +1,40 @@ +const z3 = require('z3-solver'); +import { runAsWorker } from 'synckit' + +runAsWorker(async (byte_lookup_table: Array) => { + const { Context } = await z3.init(); + const { BitVec, Solver, Int, Array, Select } = new Context('main'); + + const tlo = Array.const('TLO', Int.sort(), BitVec.sort(8)); + const thi = Array.const('THI', Int.sort(), BitVec.sort(8)); + const lut = Array.const('LUT', Int.sort(), BitVec.sort(8)); + + const solver = new Solver(); + + for (let i = 0; i < 256; i++) { + if (byte_lookup_table[i] > 0) { + solver.add(Select(lut, i).neq(BitVec.val(0, 8))); + } else { + solver.add(Select(lut, i).eq(BitVec.val(0, 8))); + } + + solver.add(Select(tlo, i & 0xf).and(Select(thi, i >> 4)).eq(Select(lut, i))); + } + + const solved = await solver.check(); + if (solved === 'unsat') { + return null; + } + + const model = await solver.model(); + + let aa = []; + let bb = []; + + for (let i = 0; i < 16; i++) { + aa.push(Number(model.eval(Select(tlo, i)).value())); + bb.push(Number(model.eval(Select(thi, i)).value())); + } + + return [ aa, bb]; +}) \ No newline at end of file