Skip to content

Commit 63d9c3b

Browse files
authored
Improve unit test coverage (#1095)
* Improve unit test auto-detection * Use default model options * Add mgp_str unit tests * Add janus processing unit tests * Add jina_clip processor unit tests * Fix typo in filename * Create `rand` tensor function * Add VitPose unit test * Add sam modelling unit test * Improve pipeline unit tests * Add image utilities unit testing * Add image segmentation pipeline unit tests * Add zero-shot classification pipeline unit test * Move pipeline unit tests to subfolder * Add instanceof checks * Add image feature extraction pipeline tests * Add feature extraction pipeline unit tests * Add zero-shot object detection pipeline unit tests * Add depth estimation pipeline unit test * Add automatic speech recognition pipeline unit test * Fix typo * Add text to audio pipeline unit tests * Add image to text pipeline unit test * Add image to image pipeline unit test * Add zero-shot audio classification pipeline unit test * Fix typo * Add summarization pipeline unit test * Add text2text generation unit test * Add text2text generation pipeline unit test * Remove unused variables
1 parent effa9a9 commit 63d9c3b

File tree

55 files changed

+3099
-1459
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3099
-1459
lines changed

src/utils/tensor.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,6 +1430,20 @@ export function zeros_like(tensor) {
14301430
return zeros(tensor.dims);
14311431
}
14321432

1433+
/**
1434+
* Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
1435+
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
1436+
* @returns {Tensor} The random tensor.
1437+
*/
1438+
export function rand(size) {
1439+
const length = size.reduce((a, b) => a * b, 1);
1440+
return new Tensor(
1441+
"float32",
1442+
Float32Array.from({ length }, () => Math.random()),
1443+
size,
1444+
)
1445+
}
1446+
14331447
/**
14341448
* Quantizes the embeddings tensor to binary or unsigned binary precision.
14351449
* @param {Tensor} tensor The tensor to quantize.

tests/asset_cache.js

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { RawImage } from "../src/transformers.js";
33
const BASE_URL = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/";
44
const TEST_IMAGES = Object.freeze({
55
white_image: BASE_URL + "white-image.png",
6+
blue_image: BASE_URL + "blue-image.png",
67
pattern_3x3: BASE_URL + "pattern_3x3.png",
78
pattern_3x5: BASE_URL + "pattern_3x5.png",
89
checkerboard_8x8: BASE_URL + "checkerboard_8x8.png",
@@ -21,8 +22,14 @@ const TEST_IMAGES = Object.freeze({
2122

2223
beetle: BASE_URL + "beetle.png",
2324
book_cover: BASE_URL + "book-cover.png",
25+
corgi: BASE_URL + "corgi.jpg",
26+
man_on_car: BASE_URL + "young-man-standing-and-leaning-on-car.jpg",
2427
});
2528

29+
const TEST_AUDIOS = {
30+
mlk: BASE_URL + "mlk.npy",
31+
};
32+
2633
/** @type {Map<string, RawImage>} */
2734
const IMAGE_CACHE = new Map();
2835
const load_image = async (url) => {
@@ -35,9 +42,29 @@ const load_image = async (url) => {
3542
return image;
3643
};
3744

45+
/** @type {Map<string, any>} */
46+
const AUDIO_CACHE = new Map();
47+
const load_audio = async (url) => {
48+
const cached = AUDIO_CACHE.get(url);
49+
if (cached) {
50+
return cached;
51+
}
52+
const buffer = await (await fetch(url)).arrayBuffer();
53+
const audio = Float32Array.from(new Float64Array(buffer));
54+
AUDIO_CACHE.set(url, audio);
55+
return audio;
56+
};
57+
3858
/**
3959
* Load a cached image.
4060
* @param {keyof typeof TEST_IMAGES} name The name of the image to load.
4161
* @returns {Promise<RawImage>} The loaded image.
4262
*/
4363
export const load_cached_image = (name) => load_image(TEST_IMAGES[name]);
64+
65+
/**
66+
* Load a cached audio.
67+
* @param {keyof typeof TEST_AUDIOS} name The name of the audio to load.
68+
* @returns {Promise<Float32Array>} The loaded audio.
69+
*/
70+
export const load_cached_audio = (name) => load_audio(TEST_AUDIOS[name]);

tests/feature_extractors.test.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { init } from "./init.js";
2+
import { collect_and_execute_tests } from "./test_utils.js";
3+
4+
init();
5+
await collect_and_execute_tests("Feature extractors", "feature_extraction");

tests/image_processors.test.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { init } from "./init.js";
2+
import { collect_and_execute_tests } from "./test_utils.js";
3+
4+
init();
5+
await collect_and_execute_tests("Image processors", "image_processing");

tests/init.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,18 @@ export function init() {
5757
registerBackend("test", onnxruntimeBackend, Number.POSITIVE_INFINITY);
5858
}
5959

60+
export const MAX_TOKENIZER_LOAD_TIME = 10_000; // 10 seconds
61+
export const MAX_FEATURE_EXTRACTOR_LOAD_TIME = 10_000; // 10 seconds
6062
export const MAX_PROCESSOR_LOAD_TIME = 10_000; // 10 seconds
6163
export const MAX_MODEL_LOAD_TIME = 15_000; // 15 seconds
6264
export const MAX_TEST_EXECUTION_TIME = 60_000; // 60 seconds
6365
export const MAX_MODEL_DISPOSE_TIME = 1_000; // 1 second
6466

6567
export const MAX_TEST_TIME = MAX_MODEL_LOAD_TIME + MAX_TEST_EXECUTION_TIME + MAX_MODEL_DISPOSE_TIME;
6668

67-
export const DEFAULT_MODEL_OPTIONS = {
69+
export const DEFAULT_MODEL_OPTIONS = Object.freeze({
6870
dtype: "fp32",
69-
};
71+
});
7072

7173
expect.extend({
7274
toBeCloseToNested(received, expected, numDigits = 2) {

tests/models.test.js

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,9 @@
22
* Test that models loaded outside of the `pipeline` function work correctly (e.g., `AutoModel.from_pretrained(...)`);
33
*/
44

5-
import * as MODEL_TESTS from "./models/all_modeling_tests.js";
6-
75
import { AutoTokenizer, AutoModel, BertModel, GPT2Model, T5ForConditionalGeneration, BertTokenizer, GPT2Tokenizer, T5Tokenizer } from "../src/transformers.js";
8-
9-
import { init, MAX_TEST_EXECUTION_TIME } from "./init.js";
10-
11-
import { compare } from "./test_utils.js";
6+
import { init, MAX_TEST_EXECUTION_TIME, DEFAULT_MODEL_OPTIONS } from "./init.js";
7+
import { compare, collect_and_execute_tests } from "./test_utils.js";
128

139
// Initialise the testing environment
1410
init();
@@ -38,7 +34,7 @@ describe("Loading different architecture types", () => {
3834
async () => {
3935
// Load model and tokenizer
4036
const tokenizer = await tokenizerClassToTest.from_pretrained(model_id);
41-
const model = await modelClassToTest.from_pretrained(model_id, { dtype: "fp32" });
37+
const model = await modelClassToTest.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS);
4238

4339
const tests = [
4440
texts[0], // single
@@ -65,7 +61,6 @@ describe("Loading different architecture types", () => {
6561
throw new Error("Unexpected output");
6662
}
6763
}
68-
6964
await model.dispose();
7065
},
7166
MAX_TEST_EXECUTION_TIME,
@@ -74,8 +69,4 @@ describe("Loading different architecture types", () => {
7469
}
7570
});
7671

77-
describe("Model-specific tests", () => {
78-
for (const [modelName, modelTest] of Object.entries(MODEL_TESTS)) {
79-
describe(modelName, modelTest);
80-
}
81-
});
72+
await collect_and_execute_tests("Model-specific tests", "modeling");

tests/models/all_modeling_tests.js

Lines changed: 0 additions & 33 deletions
This file was deleted.

tests/models/all_tokenization_tests.js

Lines changed: 0 additions & 22 deletions
This file was deleted.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import { AutoFeatureExtractor, ASTFeatureExtractor } from "../../../src/transformers.js";
2+
3+
import { load_cached_audio } from "../../asset_cache.js";
4+
import { MAX_FEATURE_EXTRACTOR_LOAD_TIME, MAX_TEST_EXECUTION_TIME } from "../../init.js";
5+
6+
export default () => {
7+
// ASTFeatureExtractor
8+
describe("ASTFeatureExtractor", () => {
9+
const model_id = "Xenova/ast-finetuned-audioset-10-10-0.4593";
10+
11+
/** @type {ASTFeatureExtractor} */
12+
let feature_extractor;
13+
beforeAll(async () => {
14+
feature_extractor = await AutoFeatureExtractor.from_pretrained(model_id);
15+
}, MAX_FEATURE_EXTRACTOR_LOAD_TIME);
16+
17+
it(
18+
"truncation",
19+
async () => {
20+
const audio = await load_cached_audio("mlk");
21+
const { input_values } = await feature_extractor(audio);
22+
expect(input_values.dims).toEqual([1, 1024, 128]);
23+
24+
expect(input_values.mean().item()).toBeCloseTo(-0.04054912979309085);
25+
expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
26+
expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
27+
expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
28+
expect(input_values.data[1025]).toBeCloseTo(-1.1204065084457397);
29+
},
30+
MAX_TEST_EXECUTION_TIME,
31+
);
32+
33+
it(
34+
"padding",
35+
async () => {
36+
const audio = await load_cached_audio("mlk");
37+
const { input_values } = await feature_extractor(audio.slice(0, 1000));
38+
expect(input_values.dims).toEqual([1, 1024, 128]); // [1, 4, 128] -> (padded to) -> [1, 1024, 128]
39+
40+
expect(input_values.mean().item()).toBeCloseTo(0.4647964835166931);
41+
expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
42+
expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
43+
expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
44+
45+
// padded values
46+
expect(input_values.data[1025]).toBeCloseTo(0.46703237295150757);
47+
expect(input_values.data[2049]).toBeCloseTo(0.46703237295150757);
48+
expect(input_values.data[10000]).toBeCloseTo(0.46703237295150757);
49+
},
50+
MAX_TEST_EXECUTION_TIME,
51+
);
52+
});
53+
};
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { AutoFeatureExtractor, ClapFeatureExtractor } from "../../../src/transformers.js";
2+
3+
import { load_cached_audio } from "../../asset_cache.js";
4+
import { MAX_FEATURE_EXTRACTOR_LOAD_TIME, MAX_TEST_EXECUTION_TIME } from "../../init.js";
5+
6+
export default () => {
7+
// ClapFeatureExtractor
8+
describe("ClapFeatureExtractor", () => {
9+
const model_id = "Xenova/clap-htsat-unfused";
10+
11+
/** @type {ClapFeatureExtractor} */
12+
let feature_extractor;
13+
beforeAll(async () => {
14+
feature_extractor = await AutoFeatureExtractor.from_pretrained(model_id);
15+
}, MAX_FEATURE_EXTRACTOR_LOAD_TIME);
16+
17+
it(
18+
"truncation",
19+
async () => {
20+
const audio = await load_cached_audio("mlk");
21+
22+
// Since truncation uses a random strategy, we override
23+
// Math.random to ensure that the test is deterministic
24+
const originalRandom = Math.random;
25+
Math.random = () => 0.5;
26+
27+
let long_audio = new Float32Array(500000);
28+
long_audio.set(audio);
29+
long_audio.set(audio, long_audio.length - audio.length);
30+
31+
const { input_features } = await feature_extractor(long_audio);
32+
const { dims, data } = input_features;
33+
expect(dims).toEqual([1, 1, 1001, 64]);
34+
35+
expect(input_features.mean().item()).toBeCloseTo(-37.94569396972656);
36+
expect(data[0]).toBeCloseTo(-53.32647705078125);
37+
expect(data[1]).toBeCloseTo(-47.76755142211914);
38+
expect(data[65]).toBeCloseTo(-36.32261276245117);
39+
expect(data[1002]).toBeCloseTo(-28.0314884185791);
40+
expect(data[10000]).toBeCloseTo(-21.905902862548828);
41+
expect(data[60000]).toBeCloseTo(-14.877863883972168);
42+
expect(data[64062]).toBeCloseTo(-37.9784049987793);
43+
expect(data[64063]).toBeCloseTo(-37.73963928222656);
44+
45+
// Reset Math.random
46+
Math.random = originalRandom;
47+
},
48+
MAX_TEST_EXECUTION_TIME,
49+
);
50+
51+
it(
52+
"padding",
53+
async () => {
54+
const audio = await load_cached_audio("mlk");
55+
const { input_features } = await feature_extractor(audio);
56+
const { data, dims } = input_features;
57+
expect(dims).toEqual([1, 1, 1001, 64]);
58+
59+
expect(input_features.mean().item()).toBeCloseTo(-34.99049377441406);
60+
expect(data[0]).toBeCloseTo(-21.32573890686035);
61+
expect(data[1]).toBeCloseTo(-26.168411254882812);
62+
expect(data[65]).toBeCloseTo(-29.716018676757812);
63+
expect(data[1002]).toBeCloseTo(-32.16273498535156);
64+
expect(data[10000]).toBeCloseTo(-19.9283390045166);
65+
66+
// padded values
67+
expect(data[60000]).toBeCloseTo(-100.0);
68+
expect(data[64062]).toBeCloseTo(-100.0);
69+
expect(data[64063]).toBeCloseTo(-100.0);
70+
},
71+
MAX_TEST_EXECUTION_TIME,
72+
);
73+
});
74+
};

0 commit comments

Comments
 (0)