Skip to content

Commit 8771078

Browse files
authored
Basic HTML property testing for WebAssembly (#425)
Import https://gist.github.com/jelmervdl/a4c8b6b92ad88a885e1cbd51c6ad4902 and attach it to CI. NodeJS-14 is failing on trying to use the WebAssembly binary. So we use node-16 independently setup. This paves way for more complicated testing for WebAssembly bindings in the future.
1 parent 61d2c35 commit 8771078

File tree

2 files changed

+156
-44
lines changed

2 files changed

+156
-44
lines changed

.github/workflows/build.yml

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -232,16 +232,15 @@ jobs:
232232
ccache -s # Print current cache stats
233233
ccache -z # Zero cache entry
234234
235-
# WORMHOLE=off
236-
- name: "Configure builds for WORMHOLE=off"
235+
- name: "Configure builds"
237236
run: |
238-
mkdir -p build-wasm-without-wormhole
239-
cd build-wasm-without-wormhole
237+
mkdir -p build-wasm
238+
cd build-wasm
240239
emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off ..
241240
242241
243-
- name: "Compile with WORMHOLE=off"
244-
working-directory: build-wasm-without-wormhole
242+
- name: "Compile"
243+
working-directory: build-wasm
245244
run: |
246245
emmake make -j2
247246
@@ -250,43 +249,24 @@ jobs:
250249
ccache -s # Print current cache stats
251250
252251
- name: Import GEMM library from a separate wasm module
253-
working-directory: build-wasm-without-wormhole
252+
working-directory: build-wasm
254253
run: bash ../wasm/patch-artifacts-import-gemm-module.sh
255254

255+
# Setup nodejs-16, as nodejs-14 provided by emsdk fails when running.
256+
- name: Setup nodejs
257+
uses: actions/setup-node@v3
258+
with:
259+
node-version: 16
256260

257-
# WORMHOLE=on
258-
- name: "Configure builds for WORMHOLE=on"
259-
run: |
260-
mkdir -p build-wasm-with-wormhole
261-
cd build-wasm-with-wormhole
262-
emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=on ..
263-
264-
265-
- name: "Compile with WORMHOLE=on"
266-
working-directory: build-wasm-with-wormhole
267-
run: |
268-
emmake make -j2
269-
270-
- name: ccache epilog
271-
run: |
272-
ccache -s # Print current cache stats
273-
274-
- name: Instantiate simd wormhole
275-
working-directory: build-wasm-with-wormhole
276-
run: bash ../wasm/patch-artifacts-enable-wormhole.sh
277-
278-
- name: Import GEMM library from a separate wasm module
279-
working-directory: build-wasm-with-wormhole
280-
run: bash ../wasm/patch-artifacts-import-gemm-module.sh
281-
282-
# Rename the wormhole on builds
283-
- name: Rename artefacts with wormhole
284-
working-directory: build-wasm-with-wormhole
261+
- name: Test run
262+
working-directory: wasm
285263
run: |
286-
mv bergamot-translator-worker{,-with-wormhole}.js
287-
mv bergamot-translator-worker{,-with-wormhole}.js.bak
288-
mv bergamot-translator-worker{,-with-wormhole}.wasm
264+
cp ../build-wasm/bergamot-translator-worker.{js,wasm} ./
265+
npm install jsdom
289266
267+
# --unhandled-rejections make the script exit with a non-zero code (at least on node-14).
268+
# So leaving this here.
269+
node --unhandled-rejections=strict node-test.js
290270
291271
# Upload both together.
292272
- name: Upload wasm artifact
@@ -296,13 +276,10 @@ jobs:
296276
if-no-files-found: error
297277
path: |
298278
# Without wormhole
299-
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js
300-
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.wasm
301-
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js.bak
279+
${{github.workspace}}/build-wasm/bergamot-translator-worker.js
280+
${{github.workspace}}/build-wasm/bergamot-translator-worker.wasm
281+
${{github.workspace}}/build-wasm/bergamot-translator-worker.js.bak
302282
303-
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js
304-
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.wasm
305-
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js.bak
306283
307284
# Try to upload a release using https://github.com/marvinpinto/actions/issues/177#issuecomment-917605585 as a model
308285
release-latest:
@@ -313,6 +290,11 @@ jobs:
313290
steps:
314291
- name: Download artifacts
315292
uses: actions/download-artifact@v2
293+
294+
# Leave the below be, it will be useful.
295+
- name: List downloaded assets
296+
run: |
297+
find ./
316298
317299
- name: Update GitHub prerelease
318300
uses: marvinpinto/action-automatic-releases@latest
@@ -338,6 +320,11 @@ jobs:
338320
steps:
339321
- name: Download artifacts
340322
uses: actions/download-artifact@v2
323+
324+
# Leave the below be, it will be useful.
325+
- name: List downloaded assets
326+
run: |
327+
find ./
341328
342329
- name: Update GitHub release
343330
uses: marvinpinto/action-automatic-releases@latest

wasm/node-test.js

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
const {Blob} = require('buffer');
2+
const fs = require('fs');
3+
const https = require('https');
4+
const {JSDOM} = require('jsdom');
5+
6+
7+
const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm');
8+
global.Module = {
9+
wasmBinary,
10+
onRuntimeInitialized
11+
};
12+
13+
// Execute bergamot-translation-worker.js in this scope
14+
const js = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'});
15+
eval.call(global, js);
16+
17+
/**
18+
* Helper to download file into ArrayBuffer.
19+
*/
20+
function download(url) {
21+
return new Promise((accept, reject) => {
22+
https.get(url, (res) => {
23+
const chunks = [];
24+
res.on('error', reject);
25+
res.on('data', chunk => chunks.push(chunk));
26+
res.on('end', async () => {
27+
const data = new Blob(chunks);
28+
data.arrayBuffer().then(accept, reject);
29+
});
30+
});
31+
});
32+
}
33+
34+
/**
35+
* Loads ArrayBuffer into AlignedMemory.
36+
*/
37+
function load(buffer, alignment) {
38+
const bytes = new Int8Array(buffer);
39+
const memory = new Module.AlignedMemory(bytes.byteLength, alignment);
40+
memory.getByteArrayView().set(bytes);
41+
return memory;
42+
}
43+
44+
/**
45+
* Called from inside the worker.js script once the wasm module is loaded
46+
* and all the emscripten magic and linking has been done.
47+
*/
48+
async function onRuntimeInitialized() {
49+
// Root url for our models for now.
50+
const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.2.14';
51+
52+
// In order of TranslationMemory's arguments
53+
const files = [
54+
{url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256},
55+
{url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64},
56+
{url: `${root}/ende/vocab.deen.spm`, alignment: 64},
57+
];
58+
59+
// Download model data and load it into aligned memory
60+
const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => {
61+
return load(await download(file.url), file.alignment);
62+
}));
63+
64+
// Config yaml (split as array to allow for indentation without adding tabs
65+
// or spaces to the strings themselves.)
66+
const config = [
67+
'beam-size: 1',
68+
'normalize: 1.0',
69+
'word-penalty: 0',
70+
'alignment: soft',
71+
'max-length-break: 128',
72+
'mini-batch-words: 1024',
73+
'workspace: 128',
74+
'max-length-factor: 2.0',
75+
'skip-cost: true',
76+
'cpu-threads: 0',
77+
'quiet: true',
78+
'quiet-translation: true',
79+
'gemm-precision: int8shiftAll',
80+
].join('\n');
81+
82+
// Set up translation service
83+
const service = new Module.BlockingService({cacheSize: 0});
84+
85+
// Put vocab into its own std::vector<AlignedMemory>
86+
const vocabs = new Module.AlignedMemoryList();
87+
vocabs.push_back(vocabMem);
88+
89+
// Setup up model with config yaml and AlignedMemory objects
90+
const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null);
91+
92+
// Construct std::vector<std::string> inputs;
93+
const input = new Module.VectorString();
94+
input.push_back('<p> Hello world! </p> <p> Goodbye World! </p>');
95+
96+
// Construct std::vector<ResponseOptions>
97+
const options = new Module.VectorResponseOptions();
98+
options.push_back({qualityScores: false, alignment: true, html: true});
99+
100+
// Translate our batch (of 1)
101+
const output = service.translate(model, input, options);
102+
103+
// Get output from std::vector<Response>
104+
// The following works as a simple black-box test of the API, based on
105+
// properties of HTML.
106+
const translation = output.get(0).getTranslatedText()
107+
108+
// Print raw translation for inspection.
109+
console.log(translation)
110+
111+
const fragment = JSDOM.fragment(translation)
112+
113+
// Print two expected tags.
114+
console.log(fragment.firstElementChild.outerHTML)
115+
console.log(fragment.lastElementChild.outerHTML)
116+
117+
// Assertion that there are two children at the output.
118+
assert(fragment.childElementCount === 2);
119+
120+
121+
// Clean-up
122+
input.delete();
123+
options.delete();
124+
output.delete();
125+
}

0 commit comments

Comments
 (0)