File tree Expand file tree Collapse file tree 2 files changed +3
-33
lines changed Expand file tree Collapse file tree 2 files changed +3
-33
lines changed Original file line number Diff line number Diff line change @@ -387,19 +387,7 @@ impl DualPathTokenizer for UnifiedTokenizer {
387387 let encoding = tokenizer
388388 . encode ( text, self . config . add_special_tokens )
389389 . map_err ( E :: msg) ?;
390-
391- // Explicitly enforce max_length truncation for LoRA models
392- // This is a safety check to ensure we never exceed the model's position embedding size
393- let mut result = self . encoding_to_result ( & encoding) ;
394- let max_len = self . config . max_length ;
395- if result. token_ids . len ( ) > max_len {
396- result. token_ids . truncate ( max_len) ;
397- result. token_ids_u32 . truncate ( max_len) ;
398- result. attention_mask . truncate ( max_len) ;
399- result. tokens . truncate ( max_len) ;
400- }
401-
402- Ok ( result)
390+ Ok ( self . encoding_to_result ( & encoding) )
403391 }
404392
405393 fn tokenize_batch_smart (
Original file line number Diff line number Diff line change @@ -499,18 +499,9 @@ impl HighPerformanceBertClassifier {
499499
500500 // Load tokenizer
501501 let tokenizer_path = Path :: new ( model_path) . join ( "tokenizer.json" ) ;
502- let mut tokenizer = Tokenizer :: from_file ( & tokenizer_path)
502+ let tokenizer = Tokenizer :: from_file ( & tokenizer_path)
503503 . map_err ( |e| E :: msg ( format ! ( "Failed to load tokenizer: {}" , e) ) ) ?;
504504
505- // Configure truncation to max 512 tokens (BERT's position embedding limit)
506- use tokenizers:: TruncationParams ;
507- tokenizer
508- . with_truncation ( Some ( TruncationParams {
509- max_length : 512 ,
510- ..Default :: default ( )
511- } ) )
512- . map_err ( E :: msg) ?;
513-
514505 // Load model weights
515506 let weights_path = if Path :: new ( model_path) . join ( "model.safetensors" ) . exists ( ) {
516507 Path :: new ( model_path) . join ( "model.safetensors" )
@@ -699,18 +690,9 @@ impl HighPerformanceBertTokenClassifier {
699690
700691 // Load tokenizer
701692 let tokenizer_path = Path :: new ( model_path) . join ( "tokenizer.json" ) ;
702- let mut tokenizer = Tokenizer :: from_file ( & tokenizer_path)
693+ let tokenizer = Tokenizer :: from_file ( & tokenizer_path)
703694 . map_err ( |e| E :: msg ( format ! ( "Failed to load tokenizer: {}" , e) ) ) ?;
704695
705- // Configure truncation to max 512 tokens (BERT's position embedding limit)
706- use tokenizers:: TruncationParams ;
707- tokenizer
708- . with_truncation ( Some ( TruncationParams {
709- max_length : 512 ,
710- ..Default :: default ( )
711- } ) )
712- . map_err ( E :: msg) ?;
713-
714696 // Load model weights
715697 let weights_path = if Path :: new ( model_path) . join ( "model.safetensors" ) . exists ( ) {
716698 Path :: new ( model_path) . join ( "model.safetensors" )
You can’t perform that action at this time.
0 commit comments