Skip to content

Commit 3c75dcc

Browse files
authored
Merge branch 'main' into main
2 parents 0b4e5f6 + 7c01a4f commit 3c75dcc

File tree

33 files changed

+379
-216
lines changed

33 files changed

+379
-216
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ You can install from source using:
4747
pip install git+https://github.com/huggingface/tokenizers.git#subdirectory=bindings/python
4848
```
4949

50-
our install the released versions with
50+
or install the released versions with
5151

5252
```bash
5353
pip install tokenizers

bindings/node/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
authors = ["Nicolas Patry <[email protected]>"]
33
edition = "2021"
44
name = "node"
5-
version = "0.21.2-dev.0"
5+
version = "0.21.4-dev.0"
66

77
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
88

bindings/node/src/decoders.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl Decoder {
2929
.read()
3030
.unwrap()
3131
.decode(tokens)
32-
.map_err(|e| Error::from_reason(format!("{}", e)))
32+
.map_err(|e| Error::from_reason(format!("{e}")))
3333
}
3434
}
3535

bindings/node/src/normalizers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl Normalizer {
2525

2626
self
2727
.normalize(&mut normalized)
28-
.map_err(|e| Error::from_reason(format!("{}", e)))?;
28+
.map_err(|e| Error::from_reason(format!("{e}")))?;
2929

3030
Ok(normalized.get().to_string())
3131
}

bindings/node/src/pre_tokenizers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl PreTokenizer {
8080

8181
self
8282
.pre_tokenize(&mut pretokenized)
83-
.map_err(|e| Error::from_reason(format!("{}", e)))?;
83+
.map_err(|e| Error::from_reason(format!("{e}")))?;
8484

8585
pretokenized
8686
.get_splits(tk::OffsetReferential::Original, tk::OffsetType::Char)

bindings/node/src/tasks/models.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ impl Task for BPEFromFilesTask {
2121
.take()
2222
.ok_or(Error::from_reason("Empty builder".to_string()))?
2323
.build()
24-
.map_err(|e| Error::from_reason(format!("{}", e)))
24+
.map_err(|e| Error::from_reason(format!("{e}")))
2525
}
2626

2727
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
@@ -45,7 +45,7 @@ impl Task for WordPieceFromFilesTask {
4545
.take()
4646
.ok_or(Error::from_reason("Empty builder".to_string()))?
4747
.build()
48-
.map_err(|e| Error::from_reason(format!("{}", e)))
48+
.map_err(|e| Error::from_reason(format!("{e}")))
4949
}
5050

5151
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
@@ -68,7 +68,7 @@ impl Task for WordLevelFromFilesTask {
6868
.take()
6969
.ok_or(Error::from_reason("Empty builder".to_string()))?
7070
.build()
71-
.map_err(|e| Error::from_reason(format!("{}", e)))
71+
.map_err(|e| Error::from_reason(format!("{e}")))
7272
}
7373

7474
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {

bindings/node/src/tasks/tokenizer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ impl Task for EncodeTask<'static> {
2828
.ok_or(Error::from_reason("No provided input"))?,
2929
self.add_special_tokens,
3030
)
31-
.map_err(|e| Error::from_reason(format!("{}", e)))
31+
.map_err(|e| Error::from_reason(format!("{e}")))
3232
}
3333

3434
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
@@ -55,7 +55,7 @@ impl Task for DecodeTask {
5555
.read()
5656
.unwrap()
5757
.decode(&self.ids, self.skip_special_tokens)
58-
.map_err(|e| Error::from_reason(format!("{}", e)))
58+
.map_err(|e| Error::from_reason(format!("{e}")))
5959
}
6060

6161
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
@@ -85,7 +85,7 @@ impl Task for EncodeBatchTask<'static> {
8585
.ok_or(Error::from_reason("No provided input"))?,
8686
self.add_special_tokens,
8787
)
88-
.map_err(|e| Error::from_reason(format!("{}", e)))
88+
.map_err(|e| Error::from_reason(format!("{e}")))
8989
}
9090

9191
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
@@ -118,7 +118,7 @@ impl Task for DecodeBatchTask {
118118
.read()
119119
.unwrap()
120120
.decode_batch(&ids, self.skip_special_tokens)
121-
.map_err(|e| Error::from_reason(format!("{}", e)))
121+
.map_err(|e| Error::from_reason(format!("{e}")))
122122
}
123123

124124
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {

bindings/node/src/tokenizer.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ impl Tokenizer {
251251
.read()
252252
.unwrap()
253253
.save(path, pretty)
254-
.map_err(|e| Error::from_reason(format!("{}", e)))
254+
.map_err(|e| Error::from_reason(format!("{e}")))
255255
}
256256

257257
#[napi]
@@ -341,9 +341,7 @@ impl Tokenizer {
341341
PreTokenizer,
342342
Processor,
343343
Decoder,
344-
> = s
345-
.parse()
346-
.map_err(|e| Error::from_reason(format!("{}", e)))?;
344+
> = s.parse().map_err(|e| Error::from_reason(format!("{e}")))?;
347345
Ok(Self {
348346
tokenizer: Arc::new(RwLock::new(tokenizer)),
349347
})
@@ -352,7 +350,7 @@ impl Tokenizer {
352350
#[napi(factory)]
353351
pub fn from_file(file: String) -> Result<Self> {
354352
let tokenizer = tk::tokenizer::TokenizerImpl::from_file(file)
355-
.map_err(|e| Error::from_reason(format!("Error loading from file{}", e)))?;
353+
.map_err(|e| Error::from_reason(format!("Error loading from file{e}")))?;
356354
Ok(Self {
357355
tokenizer: Arc::new(RwLock::new(tokenizer)),
358356
})
@@ -472,7 +470,7 @@ impl Tokenizer {
472470
.write()
473471
.unwrap()
474472
.train_from_files(&mut trainer, files)
475-
.map_err(|e| Error::from_reason(format!("{}", e)))?;
473+
.map_err(|e| Error::from_reason(format!("{e}")))?;
476474
Ok(())
477475
}
478476

@@ -504,7 +502,7 @@ impl Tokenizer {
504502
},
505503
add_special_tokens,
506504
)
507-
.map_err(|e| Error::from_reason(format!("{}", e)))?
505+
.map_err(|e| Error::from_reason(format!("{e}")))?
508506
.into(),
509507
)
510508
}

bindings/python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "tokenizers-python"
3-
version = "0.21.2-dev.0"
3+
version = "0.21.4-dev.0"
44
authors = ["Anthony MOI <[email protected]>"]
55
edition = "2021"
66

bindings/python/py_src/tokenizers/decoders/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ class DecodeStream:
44
Class needed for streaming decode
55
66
"""
7-
def __init__(self, skip_special_tokens):
7+
def __init__(self, ids=None, skip_special_tokens=False):
88
pass
99

1010
class Decoder:

0 commit comments

Comments
 (0)