Revert "fix(lora): add explicit tokenizer truncation to handle inputs >512 tokens"

yossiovadia · yossiovadia · commit c1d68b67ff8f · 2025-11-24T12:13:33.000-08:00
This reverts commit b09e4ac.
diff --git a/candle-binding/src/core/tokenization.rs b/candle-binding/src/core/tokenization.rs
@@ -387,19 +387,7 @@ impl DualPathTokenizer for UnifiedTokenizer {
         let encoding = tokenizer
             .encode(text, self.config.add_special_tokens)
             .map_err(E::msg)?;
-
-        // Explicitly enforce max_length truncation for LoRA models
-        // This is a safety check to ensure we never exceed the model's position embedding size
-        let mut result = self.encoding_to_result(&encoding);
-        let max_len = self.config.max_length;
-        if result.token_ids.len() > max_len {
-            result.token_ids.truncate(max_len);
-            result.token_ids_u32.truncate(max_len);
-            result.attention_mask.truncate(max_len);
-            result.tokens.truncate(max_len);
-        }
-
-        Ok(result)
+        Ok(self.encoding_to_result(&encoding))
     }
 
     fn tokenize_batch_smart(
diff --git a/candle-binding/src/model_architectures/lora/bert_lora.rs b/candle-binding/src/model_architectures/lora/bert_lora.rs
@@ -499,18 +499,9 @@ impl HighPerformanceBertClassifier {
 
         // Load tokenizer
         let tokenizer_path = Path::new(model_path).join("tokenizer.json");
-        let mut tokenizer = Tokenizer::from_file(&tokenizer_path)
+        let tokenizer = Tokenizer::from_file(&tokenizer_path)
             .map_err(|e| E::msg(format!("Failed to load tokenizer: {}", e)))?;
 
-        // Configure truncation to max 512 tokens (BERT's position embedding limit)
-        use tokenizers::TruncationParams;
-        tokenizer
-            .with_truncation(Some(TruncationParams {
-                max_length: 512,
-                ..Default::default()
-            }))
-            .map_err(E::msg)?;
-
         // Load model weights
         let weights_path = if Path::new(model_path).join("model.safetensors").exists() {
             Path::new(model_path).join("model.safetensors")
@@ -699,18 +690,9 @@ impl HighPerformanceBertTokenClassifier {
 
         // Load tokenizer
         let tokenizer_path = Path::new(model_path).join("tokenizer.json");
-        let mut tokenizer = Tokenizer::from_file(&tokenizer_path)
+        let tokenizer = Tokenizer::from_file(&tokenizer_path)
             .map_err(|e| E::msg(format!("Failed to load tokenizer: {}", e)))?;
 
-        // Configure truncation to max 512 tokens (BERT's position embedding limit)
-        use tokenizers::TruncationParams;
-        tokenizer
-            .with_truncation(Some(TruncationParams {
-                max_length: 512,
-                ..Default::default()
-            }))
-            .map_err(E::msg)?;
-
         // Load model weights
         let weights_path = if Path::new(model_path).join("model.safetensors").exists() {
             Path::new(model_path).join("model.safetensors")