Skip to content

Commit 3501a77

Browse files
committed
Formatting
1 parent fa2f334 commit 3501a77

File tree

1 file changed

+56
-23
lines changed

1 file changed

+56
-23
lines changed

tests/tokenizers.test.js

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ describe("Tokenizer padding/truncation", () => {
5454
}, MAX_TOKENIZER_LOAD_TIME);
5555

5656
describe("return_tensor=false (jagged array)", () => {
57-
5857
test("jagged array output when return_tensor is false", () => {
5958
const output = tokenizer(inputs, {
6059
return_tensor: false,
@@ -105,7 +104,6 @@ describe("Tokenizer padding/truncation", () => {
105104
compare(output, expected);
106105
});
107106

108-
109107
test("No padding, max_length=3 (implicit truncation strategy)", () => {
110108
const output = tokenizer(inputs_2, {
111109
padding: false,
@@ -129,9 +127,18 @@ describe("Tokenizer padding/truncation", () => {
129127
return_tensor: false,
130128
});
131129
const expected = {
132-
input_ids: [[1037, 0, 0, 0, 0], [1038, 1039, 1040, 1041, 1042]],
133-
token_type_ids: [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
134-
attention_mask: [[1, 0, 0, 0, 0], [1, 1, 1, 1, 1]],
130+
input_ids: [
131+
[1037, 0, 0, 0, 0],
132+
[1038, 1039, 1040, 1041, 1042],
133+
],
134+
token_type_ids: [
135+
[0, 0, 0, 0, 0],
136+
[0, 0, 0, 0, 0],
137+
],
138+
attention_mask: [
139+
[1, 0, 0, 0, 0],
140+
[1, 1, 1, 1, 1],
141+
],
135142
};
136143
compare(output, expected);
137144
});
@@ -161,48 +168,75 @@ describe("Tokenizer padding/truncation", () => {
161168
return_tensor: false,
162169
});
163170
const expected = {
164-
input_ids: [[1037, 0, 0], [1038, 1039, 1040]],
165-
token_type_ids: [[0, 0, 0], [0, 0, 0]],
166-
attention_mask: [[1, 0, 0], [1, 1, 1]],
171+
input_ids: [
172+
[1037, 0, 0],
173+
[1038, 1039, 1040],
174+
],
175+
token_type_ids: [
176+
[0, 0, 0],
177+
[0, 0, 0],
178+
],
179+
attention_mask: [
180+
[1, 0, 0],
181+
[1, 1, 1],
182+
],
167183
};
168184
compare(output, expected);
169185
});
170186

171187
test("Padding 'max_length' without truncation, max_length=3", () => {
172188
const output = tokenizer(inputs_2, {
173-
padding: 'max_length',
189+
padding: "max_length",
174190
truncation: false,
175191
max_length: 3,
176192
add_special_tokens: false,
177193
return_tensor: false,
178194
});
179195
const expected = {
180-
input_ids: [[1037, 0, 0], [1038, 1039, 1040, 1041, 1042]],
181-
token_type_ids: [[0, 0, 0], [0, 0, 0, 0, 0]],
182-
attention_mask: [[1, 0, 0], [1, 1, 1, 1, 1]],
196+
input_ids: [
197+
[1037, 0, 0],
198+
[1038, 1039, 1040, 1041, 1042],
199+
],
200+
token_type_ids: [
201+
[0, 0, 0],
202+
[0, 0, 0, 0, 0],
203+
],
204+
attention_mask: [
205+
[1, 0, 0],
206+
[1, 1, 1, 1, 1],
207+
],
183208
};
184209
compare(output, expected);
185210
});
186211

187212
test("Padding 'max_length' with truncation, max_length=3", () => {
188213
const output = tokenizer(inputs_2, {
189-
padding: 'max_length',
214+
padding: "max_length",
190215
truncation: true,
191216
max_length: 3,
192217
add_special_tokens: false,
193218
return_tensor: false,
194219
});
195220
const expected = {
196-
input_ids: [[1037, 0, 0], [1038, 1039, 1040]],
197-
token_type_ids: [[0, 0, 0], [0, 0, 0]],
198-
attention_mask: [[1, 0, 0], [1, 1, 1]],
221+
input_ids: [
222+
[1037, 0, 0],
223+
[1038, 1039, 1040],
224+
],
225+
token_type_ids: [
226+
[0, 0, 0],
227+
[0, 0, 0],
228+
],
229+
attention_mask: [
230+
[1, 0, 0],
231+
[1, 1, 1],
232+
],
199233
};
200234
compare(output, expected);
201235
});
202236

203237
test("Padding 'max_length' without truncation and max_length=null", () => {
204238
const output = tokenizer(inputs_2, {
205-
padding: 'max_length',
239+
padding: "max_length",
206240
truncation: false,
207241
max_length: null,
208242
add_special_tokens: false,
@@ -211,23 +245,22 @@ describe("Tokenizer padding/truncation", () => {
211245
const expected = {
212246
input_ids: [
213247
[1037, ...Array(511).fill(0)],
214-
[1038, 1039, 1040, 1041, 1042, ...Array(507).fill(0)]
248+
[1038, 1039, 1040, 1041, 1042, ...Array(507).fill(0)],
215249
],
216250
token_type_ids: [
217251
[0, ...Array(511).fill(0)],
218-
[0, 0, 0, 0, 0, ...Array(507).fill(0)]
252+
[0, 0, 0, 0, 0, ...Array(507).fill(0)],
219253
],
220254
attention_mask: [
221255
[1, ...Array(511).fill(0)],
222-
[1, 1, 1, 1, 1, ...Array(507).fill(0)]
256+
[1, 1, 1, 1, 1, ...Array(507).fill(0)],
223257
],
224258
};
225259
compare(output, expected);
226260
});
227261
});
228262

229263
describe("return_tensor=true", () => {
230-
231264
test("throws error when tensor output is requested for a jagged array", () => {
232265
expect(() => tokenizer(inputs)).toThrow("Unable to create tensor");
233266
});
@@ -329,7 +362,7 @@ describe("Tokenizer padding/truncation", () => {
329362

330363
test("padding:'max_length' pads to the specified max_length", () => {
331364
const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
332-
padding: 'max_length',
365+
padding: "max_length",
333366
truncation: true,
334367
add_special_tokens: false,
335368
max_length: 3,
@@ -347,7 +380,7 @@ describe("Tokenizer padding/truncation", () => {
347380
[0n, 0n, 0n],
348381
]);
349382
});
350-
})
383+
});
351384
});
352385

353386
describe("Token type ids", () => {

0 commit comments

Comments
 (0)