|
| 1 | +const {Blob} = require('buffer'); |
| 2 | +const fs = require('fs'); |
| 3 | +const https = require('https'); |
| 4 | +const {JSDOM} = require('jsdom'); |
| 5 | + |
| 6 | + |
| 7 | +const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm'); |
| 8 | +global.Module = { |
| 9 | + wasmBinary, |
| 10 | + onRuntimeInitialized |
| 11 | +}; |
| 12 | + |
| 13 | +// Execute bergamot-translation-worker.js in this scope |
| 14 | +const js = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'}); |
| 15 | +eval.call(global, js); |
| 16 | + |
| 17 | +/** |
| 18 | + * Helper to download file into ArrayBuffer. |
| 19 | + */ |
| 20 | +function download(url) { |
| 21 | + return new Promise((accept, reject) => { |
| 22 | + https.get(url, (res) => { |
| 23 | + const chunks = []; |
| 24 | + res.on('error', reject); |
| 25 | + res.on('data', chunk => chunks.push(chunk)); |
| 26 | + res.on('end', async () => { |
| 27 | + const data = new Blob(chunks); |
| 28 | + data.arrayBuffer().then(accept, reject); |
| 29 | + }); |
| 30 | + }); |
| 31 | + }); |
| 32 | +} |
| 33 | + |
| 34 | +/** |
| 35 | + * Loads ArrayBuffer into AlignedMemory. |
| 36 | + */ |
| 37 | +function load(buffer, alignment) { |
| 38 | + const bytes = new Int8Array(buffer); |
| 39 | + const memory = new Module.AlignedMemory(bytes.byteLength, alignment); |
| 40 | + memory.getByteArrayView().set(bytes); |
| 41 | + return memory; |
| 42 | +} |
| 43 | + |
| 44 | +/** |
| 45 | + * Called from inside the worker.js script once the wasm module is loaded |
| 46 | + * and all the emscripten magic and linking has been done. |
| 47 | + */ |
| 48 | +async function onRuntimeInitialized() { |
| 49 | + // Root url for our models for now. |
| 50 | + const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.2.14'; |
| 51 | + |
| 52 | + // In order of TranslationMemory's arguments |
| 53 | + const files = [ |
| 54 | + {url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256}, |
| 55 | + {url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64}, |
| 56 | + {url: `${root}/ende/vocab.deen.spm`, alignment: 64}, |
| 57 | + ]; |
| 58 | + |
| 59 | + // Download model data and load it into aligned memory |
| 60 | + const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => { |
| 61 | + return load(await download(file.url), file.alignment); |
| 62 | + })); |
| 63 | + |
| 64 | + // Config yaml (split as array to allow for indentation without adding tabs |
| 65 | + // or spaces to the strings themselves.) |
| 66 | + const config = [ |
| 67 | + 'beam-size: 1', |
| 68 | + 'normalize: 1.0', |
| 69 | + 'word-penalty: 0', |
| 70 | + 'alignment: soft', |
| 71 | + 'max-length-break: 128', |
| 72 | + 'mini-batch-words: 1024', |
| 73 | + 'workspace: 128', |
| 74 | + 'max-length-factor: 2.0', |
| 75 | + 'skip-cost: true', |
| 76 | + 'cpu-threads: 0', |
| 77 | + 'quiet: true', |
| 78 | + 'quiet-translation: true', |
| 79 | + 'gemm-precision: int8shiftAll', |
| 80 | + ].join('\n'); |
| 81 | + |
| 82 | + // Set up translation service |
| 83 | + const service = new Module.BlockingService({cacheSize: 0}); |
| 84 | + |
| 85 | + // Put vocab into its own std::vector<AlignedMemory> |
| 86 | + const vocabs = new Module.AlignedMemoryList(); |
| 87 | + vocabs.push_back(vocabMem); |
| 88 | + |
| 89 | + // Setup up model with config yaml and AlignedMemory objects |
| 90 | + const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null); |
| 91 | + |
| 92 | + // Construct std::vector<std::string> inputs; |
| 93 | + const input = new Module.VectorString(); |
| 94 | + input.push_back('<p> Hello world! </p> <p> Goodbye World! </p>'); |
| 95 | + |
| 96 | + // Construct std::vector<ResponseOptions> |
| 97 | + const options = new Module.VectorResponseOptions(); |
| 98 | + options.push_back({qualityScores: false, alignment: true, html: true}); |
| 99 | + |
| 100 | + // Translate our batch (of 1) |
| 101 | + const output = service.translate(model, input, options); |
| 102 | + |
| 103 | + // Get output from std::vector<Response> |
| 104 | + // The following works as a simple black-box test of the API, based on |
| 105 | + // properties of HTML. |
| 106 | + const translation = output.get(0).getTranslatedText() |
| 107 | + |
| 108 | + // Print raw translation for inspection. |
| 109 | + console.log(translation) |
| 110 | + |
| 111 | + const fragment = JSDOM.fragment(translation) |
| 112 | + |
| 113 | + // Print two expected tags. |
| 114 | + console.log(fragment.firstElementChild.outerHTML) |
| 115 | + console.log(fragment.lastElementChild.outerHTML) |
| 116 | + |
| 117 | + // Assertion that there are two children at the output. |
| 118 | + assert(fragment.childElementCount === 2); |
| 119 | + |
| 120 | + |
| 121 | + // Clean-up |
| 122 | + input.delete(); |
| 123 | + options.delete(); |
| 124 | + output.delete(); |
| 125 | +} |
0 commit comments