Skip to content

Commit b6bd608

Browse files
authored
Update falcon tokenizer (#344)
* Update generate_tests.py * Do not add token types for `FalconTokenizer`
1 parent 5b31129 commit b6bd608

File tree

2 files changed

+2
-7
lines changed

2 files changed

+2
-7
lines changed

src/tokenizers.js

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,12 +2705,7 @@ export class CodeLlamaTokenizer extends PreTrainedTokenizer { }
27052705
export class XLMRobertaTokenizer extends PreTrainedTokenizer { }
27062706
export class MPNetTokenizer extends PreTrainedTokenizer { }
27072707

2708-
export class FalconTokenizer extends PreTrainedTokenizer {
2709-
/** @type {add_token_types} */
2710-
prepare_model_inputs(inputs) {
2711-
return add_token_types(inputs);
2712-
}
2713-
}
2708+
export class FalconTokenizer extends PreTrainedTokenizer { }
27142709

27152710
export class GPTNeoXTokenizer extends PreTrainedTokenizer { }
27162711

tests/generate_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
# List of tokenizers where the model isn't yet supported, but the tokenizer is
1212
ADDITIONAL_TOKENIZERS_TO_TEST = {
13-
'RefinedWebModel': [
13+
'falcon': [
1414
'tiiuae/falcon-7b',
1515
],
1616
"llama": [

0 commit comments

Comments
 (0)