@@ -144,15 +144,15 @@ pub fn tiktoken_decode(c: &mut Criterion) {
144144//
145145// By default these use the in-tree TinyLlama tokenizer. Override with a
146146// production-size tokenizer for more realistic numbers:
147- // TOKENIZER_PATH=/path/to/tokenizer.json cargo bench -- fasttokens
148- // TOKENIZER_PATH=Qwen/Qwen3-0.6B cargo bench -- fasttokens
147+ // TOKENIZER_PATH=/path/to/tokenizer.json cargo bench -- fastokens
148+ // TOKENIZER_PATH=Qwen/Qwen3-0.6B cargo bench -- fastokens
149149// ---------------------------------------------------------------------------
150150
151151/// Default HuggingFace model to download when TOKENIZER_PATH is not set.
152152const DEFAULT_HF_MODEL : & str = "Qwen/Qwen3-0.6B" ;
153153
154154/// Resolve a tokenizer.json path from TOKENIZER_PATH env var or download from HF Hub.
155- fn resolve_fasttokens_path ( ) -> String {
155+ fn resolve_tokenizer_path ( ) -> String {
156156 let input = std:: env:: var ( "TOKENIZER_PATH" ) . ok ( ) ;
157157
158158 if let Some ( ref p) = input
@@ -175,10 +175,10 @@ fn resolve_fasttokens_path() -> String {
175175 . to_string ( )
176176}
177177
178- const FASTTOKENS_BATCH_SIZE : usize = 64 ;
178+ const FASTOKENS_BATCH_SIZE : usize = 64 ;
179179
180- pub fn fasttokens_encode ( c : & mut Criterion ) {
181- let tokenizer_path = resolve_fasttokens_path ( ) ;
180+ pub fn fastokens_encode ( c : & mut Criterion ) {
181+ let tokenizer_path = resolve_tokenizer_path ( ) ;
182182 let test_str: & str = & INPUT_STR . repeat ( TARGET_ISL / INPUT_STR . len ( ) ) ;
183183
184184 let hf_encoder = HuggingFaceTokenizer :: from_file ( & tokenizer_path) . unwrap ( ) ;
@@ -190,10 +190,10 @@ pub fn fasttokens_encode(c: &mut Criterion) {
190190 assert_eq ! (
191191 hf_ids. token_ids( ) ,
192192 fast_ids. token_ids( ) ,
193- "fasttokens and HuggingFace must produce identical token IDs"
193+ "fastokens and HuggingFace must produce identical token IDs"
194194 ) ;
195195
196- let mut group = c. benchmark_group ( "fasttokens -encode" ) ;
196+ let mut group = c. benchmark_group ( "fastokens -encode" ) ;
197197 group. throughput ( Throughput :: Bytes ( test_str. len ( ) as u64 ) ) ;
198198
199199 group. bench_function ( "hf_encode" , |b| {
@@ -202,7 +202,7 @@ pub fn fasttokens_encode(c: &mut Criterion) {
202202 } )
203203 } ) ;
204204
205- group. bench_function ( "fasttokens_encode " , |b| {
205+ group. bench_function ( "fastokens_encode " , |b| {
206206 b. iter ( || {
207207 let _ = fast_encoder. encode ( black_box ( test_str) ) . unwrap ( ) ;
208208 } )
@@ -211,15 +211,15 @@ pub fn fasttokens_encode(c: &mut Criterion) {
211211 group. finish ( ) ;
212212}
213213
214- pub fn fasttokens_batch_encode ( c : & mut Criterion ) {
215- let tokenizer_path = resolve_fasttokens_path ( ) ;
216- let batch: Vec < & str > = ( 0 ..FASTTOKENS_BATCH_SIZE ) . map ( |_| INPUT_STR ) . collect ( ) ;
214+ pub fn fastokens_batch_encode ( c : & mut Criterion ) {
215+ let tokenizer_path = resolve_tokenizer_path ( ) ;
216+ let batch: Vec < & str > = ( 0 ..FASTOKENS_BATCH_SIZE ) . map ( |_| INPUT_STR ) . collect ( ) ;
217217 let total_bytes: u64 = batch. iter ( ) . map ( |s| s. len ( ) as u64 ) . sum ( ) ;
218218
219219 let hf_encoder = HuggingFaceTokenizer :: from_file ( & tokenizer_path) . unwrap ( ) ;
220220 let fast_encoder = FastTokenizer :: from_file ( & tokenizer_path) . unwrap ( ) ;
221221
222- let mut group = c. benchmark_group ( "fasttokens -batch-encode" ) ;
222+ let mut group = c. benchmark_group ( "fastokens -batch-encode" ) ;
223223 group. throughput ( Throughput :: Bytes ( total_bytes) ) ;
224224
225225 group. bench_function ( "hf_batch_encode" , |b| {
@@ -228,7 +228,7 @@ pub fn fasttokens_batch_encode(c: &mut Criterion) {
228228 } )
229229 } ) ;
230230
231- group. bench_function ( "fasttokens_batch_encode " , |b| {
231+ group. bench_function ( "fastokens_batch_encode " , |b| {
232232 b. iter ( || {
233233 let _ = fast_encoder. encode_batch ( black_box ( & batch) ) . unwrap ( ) ;
234234 } )
@@ -244,7 +244,7 @@ criterion_group!(
244244 decode_big,
245245 tiktoken_encode,
246246 tiktoken_decode,
247- fasttokens_encode ,
248- fasttokens_batch_encode
247+ fastokens_encode ,
248+ fastokens_batch_encode
249249) ;
250250criterion_main ! ( benches) ;
0 commit comments