Skip to content

Commit b8719b1

Browse files
authored
Ensure WASM fallback does not crash in GH actions (#402)
* Ensure WASM fallback does not crash in GH actions * Add unit test for WordPiece `max_input_chars_per_word` * Cleanup * Set max test concurrency to 1
1 parent 19daf2d commit b8719b1

File tree

5 files changed

+21
-9
lines changed

5 files changed

+21
-9
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"dev": "webpack serve --no-client-overlay",
1111
"build": "webpack && npm run typegen",
1212
"generate-tests": "python -m tests.generate_tests",
13-
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
13+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose --maxConcurrency 1",
1414
"readme": "python ./docs/scripts/build_readme.py",
1515
"docs-api": "node ./docs/scripts/generate.js",
1616
"docs-preview": "doc-builder preview transformers.js ./docs/source/ --not_python_module",

tests/generation.test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ describe('Generation parameters', () => {
99

1010
// List all models which will be tested
1111
const models = [
12-
'Xenova/LaMini-Flan-T5-77M', // encoder-decoder
13-
'Xenova/LaMini-GPT-124M', // decoder-only
12+
'MBZUAI/LaMini-Flan-T5-77M', // encoder-decoder
13+
'MBZUAI/LaMini-GPT-124M', // decoder-only
1414
];
1515

1616
// encoder-decoder model

tests/init.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ import { onnxruntimeBackend } from "onnxruntime-node/dist/backend";
99
import ONNX_COMMON from "onnxruntime-common";
1010

1111
export function init() {
12+
// In rare cases (specifically when running unit tests with GitHub actions), possibly due to
13+
// a large number of concurrent executions, onnxruntime might fallback to use the WASM backend.
14+
// In this case, we set the number of threads to 1 to avoid errors like:
15+
// - `TypeError: The worker script or module filename must be an absolute path or a relative path starting with './' or '../'. Received "blob:nodedata:..."`
16+
ONNX_COMMON.env.wasm.numThreads = 1;
17+
1218
// A workaround to define a new backend for onnxruntime, which
1319
// will not throw an error when running tests with jest.
1420
// For more information, see: https://github.com/jestjs/jest/issues/11864#issuecomment-1261468011

tests/tensor.test.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11

2-
import { AutoProcessor, Tensor } from '../src/transformers.js';
3-
4-
import { MAX_TEST_EXECUTION_TIME, m } from './init.js';
2+
import { Tensor } from '../src/transformers.js';
53
import { compare } from './test_utils.js';
6-
74
import { cat, mean, stack } from '../src/utils/tensor.js';
85

96
describe('Tensor operations', () => {

tests/tokenizers.test.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import { AutoTokenizer } from '../src/transformers.js';
44
import { getFile } from '../src/utils/hub.js';
55
import { m, MAX_TEST_EXECUTION_TIME } from './init.js';
6+
import { compare } from './test_utils.js';
67

78
// Load test data generated by the python tests
89
// TODO do this dynamically?
@@ -41,10 +42,18 @@ describe('Tokenizers', () => {
4142

4243
describe('Edge cases', () => {
4344
it('should not crash when encoding a very long string', async () => {
44-
let tokenizer = await AutoTokenizer.from_pretrained('t5-small');
45+
let tokenizer = await AutoTokenizer.from_pretrained('Xenova/t5-small');
4546

4647
let text = String.prototype.repeat.call('Hello world! ', 50000);
47-
let encoded = await tokenizer(text);
48+
let encoded = tokenizer(text);
4849
expect(encoded.input_ids.data.length).toBeGreaterThan(100000);
4950
}, MAX_TEST_EXECUTION_TIME);
51+
52+
it('should not take too long', async () => {
53+
let tokenizer = await AutoTokenizer.from_pretrained('Xenova/all-MiniLM-L6-v2');
54+
55+
let text = String.prototype.repeat.call('a', 50000);
56+
let token_ids = tokenizer.encode(text);
57+
compare(token_ids, [101, 100, 102])
58+
}, 5000); // NOTE: 5 seconds
5059
});

0 commit comments

Comments
 (0)