Skip to content

Commit 79a83d1

Browse files
committed
Remove metaspace add_prefix_space logic (pre_tokenizer + decoder)
1 parent b33281a commit 79a83d1

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

src/tokenizers.js

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2279,15 +2279,13 @@ class VitsDecoder extends Decoder {
22792279
class MetaspacePreTokenizer extends PreTokenizer {
22802280
/**
22812281
* @param {Object} config The configuration object for the MetaspacePreTokenizer.
2282-
* @param {boolean} config.add_prefix_space Whether to add a prefix space to the first token.
22832282
* @param {string} config.replacement The character to replace spaces with.
22842283
* @param {string} [config.str_rep=config.replacement] An optional string representation of the replacement character.
22852284
* @param {'first'|'never'|'always'} [config.prepend_scheme='always'] The metaspace prepending scheme.
22862285
*/
22872286
constructor(config) {
22882287
super();
22892288

2290-
this.addPrefixSpace = config.add_prefix_space ?? true;
22912289
this.replacement = config.replacement;
22922290
this.strRep = config.str_rep || this.replacement;
22932291
this.prepend_scheme = config.prepend_scheme ?? 'always';
@@ -2309,9 +2307,8 @@ class MetaspacePreTokenizer extends PreTokenizer {
23092307

23102308
if (
23112309
// We add a prefix space if:
2312-
// (1) The addPrefixSpace option is enabled and the normalized
2313-
// token does not already start with the replacement character.
2314-
(this.addPrefixSpace && !normalized.startsWith(this.replacement))
2310+
// (1) The normalized token does not already start with the replacement character.
2311+
!normalized.startsWith(this.replacement)
23152312

23162313
// and (2) either:
23172314
// (a) prepend_scheme is 'always'
@@ -2335,13 +2332,11 @@ class MetaspaceDecoder extends Decoder {
23352332
/**
23362333
* Constructs a new MetaspaceDecoder object.
23372334
* @param {Object} config The configuration object for the MetaspaceDecoder.
2338-
* @param {boolean} config.add_prefix_space Whether to add a prefix space to the decoded string.
23392335
* @param {string} config.replacement The string to replace spaces with.
23402336
*/
23412337
constructor(config) {
23422338
super(config);
23432339

2344-
this.addPrefixSpace = config.add_prefix_space;
23452340
this.replacement = config.replacement;
23462341
}
23472342

@@ -2350,7 +2345,7 @@ class MetaspaceDecoder extends Decoder {
23502345
const result = [];
23512346
for (let i = 0; i < tokens.length; ++i) {
23522347
let normalized = tokens[i].replaceAll(this.replacement, ' ');
2353-
if (this.addPrefixSpace && i == 0 && normalized.startsWith(' ')) {
2348+
if (i == 0 && normalized.startsWith(' ')) {
23542349
normalized = normalized.substring(1);
23552350
}
23562351
result.push(normalized);
@@ -3425,7 +3420,6 @@ export class LlamaTokenizer extends PreTrainedTokenizer {
34253420
this.normalizer = null;
34263421
this.pre_tokenizer = new MetaspacePreTokenizer({
34273422
replacement: SPIECE_UNDERLINE,
3428-
add_prefix_space: true,
34293423
prepend_scheme: "first",
34303424
});
34313425
}

0 commit comments

Comments
 (0)