removed TIP added max_thoughts token

codelion · codelion · commit 4b85f018872f · 2025-02-23T09:40:33.000+08:00
diff --git a/optillm/inference.py b/optillm/inference.py
@@ -1339,8 +1339,8 @@ def create(
                 min_p: float = 0.03,
                 thought_switch_tokens: List[str] = ["Wait,", "Alternatively,"],
                 min_thinking_tokens: int = 512,
-                tip_alpha: float = 4.0,
-                tip_beta: int = 1024,
+                max_thinking_tokens: int = 2048,
+                max_thoughts: int = 4,
                 num_traces: int = 1,
                 prefill: str = "",
                 start_think_token: str ="<think>",
@@ -1446,12 +1446,11 @@ def create(
                             thinkdeeper_config = {
                                 "thought_switch_tokens": thought_switch_tokens,
                                 "min_thinking_tokens": min_thinking_tokens,
+                                "max_thinking_tokens": max_thinking_tokens,
+                                "max_thoughts": max_thoughts,
                                 "prefill": prefill,
-                                "start_think_token" : start_think_token,
-                                "end_think_token" : end_think_token,
-                                "num_traces" : num_traces,
-                                "tip_alpha" : tip_alpha,
-                                "tip_beta" : tip_beta,
+                                "start_think_token": start_think_token,
+                                "end_think_token": end_think_token,
                             }
                             result = thinkdeeper_decode(
                                 pipeline.current_model,
diff --git a/optillm/thinkdeeper.py b/optillm/thinkdeeper.py
@@ -6,23 +6,20 @@
 
 logger = logging.getLogger(__name__)
 
-# Default configurations
 DEFAULT_CONFIG = {
     "min_thinking_tokens": 512,
+    "max_thinking_tokens": 2048,  # New parameter to cap thinking length
+    "max_thoughts": 4,  # New parameter to limit number of thought transitions
     "prefill": "",
     "start_think_token": "<think>",
     "end_think_token": "</think>",
-    
-    # Combined thought transition markers and TIP configs
-    "tip_alpha": 4.0,  # Penalty strength
-    "tip_beta": 1024,   # Penalty duration (number of tokens)
     "thought_switch_tokens": [
         "Wait,",
         "Alternatively,",
     ],
 }
 
-class ThinkDeeperTIPProcessor:
+class ThinkDeeperProcessor:
     def __init__(self, config: Dict[str, Any], tokenizer, model):
         self.config = {**DEFAULT_CONFIG, **config}
         self.tokenizer = tokenizer
@@ -40,27 +37,12 @@ def __init__(self, config: Dict[str, Any], tokenizer, model):
             token_ids = self.tokenizer.encode(phrase, add_special_tokens=False)
             self.thought_switch_tokens.update(token_ids)
         
-        # Track when the last thought switch occurred
-        self.last_thought_switch_pos = 0
-        
-    def adjust_logits_with_tip(self, logits: torch.Tensor, current_pos: int) -> torch.Tensor:
-        """Apply Thought Switching Penalty (TIP) to logits"""
-        tokens_since_last_switch = current_pos - self.last_thought_switch_pos
-        
-        if tokens_since_last_switch < self.config["tip_beta"]:
-            penalty_mask = torch.zeros_like(logits)
-            for token_id in self.thought_switch_tokens:
-                if token_id < logits.size(-1):  # Ensure token_id is within valid range
-                    penalty_mask[token_id] = self.config["tip_alpha"]
-            
-            adjusted_logits = logits - penalty_mask
-            return adjusted_logits
+        # Track thought switches
+        self.thought_count = 0
         
-        return logits
-
     @torch.inference_mode()
     def reasoning_effort(self, messages) -> str:
-        """Generate response with ThinkDeeper + TIP"""
+        """Generate response with ThinkDeeper's controlled thinking process"""
         
         messages.append({"role": "assistant", "content": f"{self.config['start_think_token']}\n{self.config['prefill']}"})
 
@@ -75,27 +57,28 @@ def reasoning_effort(self, messages) -> str:
         n_thinking_tokens = 0
         seen_end_think = False
         response_chunks = []
-        current_pos = 0
         
         while True:
             out = self.model(input_ids=tokens, past_key_values=kv, use_cache=True)
-            
-            # Apply TIP to logits
             logits = out.logits[0, -1, :]
-            adjusted_logits = self.adjust_logits_with_tip(logits, current_pos)
             
-            next_token = torch.multinomial(
-                torch.softmax(adjusted_logits, dim=-1), 1
-            ).item()
-            kv = out.past_key_values
+            # Force end think token if we exceed limits
+            if (n_thinking_tokens >= self.config["max_thinking_tokens"] or 
+                self.thought_count >= self.config["max_thoughts"]):
+                next_token = self.end_think_token
+                logger.debug(f"Forcing end think token. Tokens: {n_thinking_tokens}, Thoughts: {self.thought_count}")
+            else:
+                next_token = torch.multinomial(
+                    torch.softmax(logits, dim=-1), 1
+                ).item()
             
+            kv = out.past_key_values
             next_str = self.tokenizer.decode([next_token])
-            logger.debug(f"Generated token {next_token} -> '{next_str}'")
-
+            
             # Check if this is a thought-switching token
             if next_token in self.thought_switch_tokens:
-                self.last_thought_switch_pos = current_pos
-                logger.debug(f"Detected thought switch at position {current_pos}")
+                self.thought_count += 1
+                logger.debug(f"Detected thought switch. Total thoughts: {self.thought_count}")
 
             # Track if we've seen the end think token
             if next_token == self.end_think_token:
@@ -109,14 +92,15 @@ def reasoning_effort(self, messages) -> str:
                  and n_thinking_tokens < self.config["min_thinking_tokens"]) 
                 or (next_token == self.model.config.eos_token_id and not seen_end_think)):
                 
+                # Insert thought transition
                 replacement = random.choice(self.config["thought_switch_tokens"])
-                logger.debug(f"Inserting thought transition: '{replacement}' (tokens: {n_thinking_tokens}, seen_end_think: {seen_end_think})")
+                logger.debug(f"Inserting thought transition: '{replacement}' (tokens: {n_thinking_tokens})")
                 response_chunks.append(replacement)
                 replacement_tokens = self.tokenizer.encode(replacement)
                 n_thinking_tokens += len(replacement_tokens)
                 tokens = torch.tensor([replacement_tokens]).to(tokens.device)
+                self.thought_count += 1
                 seen_end_think = False
-                logger.debug("Reset seen_end_think flag after replacement")
                 
             elif next_token == self.model.config.eos_token_id and seen_end_think:
                 logger.debug("Reached EOS after end think token - stopping generation")
@@ -126,14 +110,12 @@ def reasoning_effort(self, messages) -> str:
                 response_chunks.append(next_str)
                 n_thinking_tokens += 1
                 tokens = torch.tensor([[next_token]]).to(tokens.device)
-                current_pos += 1
-                logger.debug(f"Added token to response. Total thinking tokens: {n_thinking_tokens}")
 
         # Join all chunks and trim off the initial prompt
         response = "".join(response_chunks)
         full_response = f"{self.config['start_think_token']}\n{self.config['prefill']}{response}"
         
-        logger.debug(f"Final response length: {len(full_response)} chars")
+        logger.debug(f"Final response length: {len(full_response)} chars, Total thoughts: {self.thought_count}")
         return full_response
 
 def thinkdeeper_decode(
@@ -142,25 +124,24 @@ def thinkdeeper_decode(
     messages: List[Dict[str, str]], 
     request_config: Dict[str, Any] = None
 ) -> str:
-    """Main plugin execution function with ThinkDeeper + TIP"""
-    logger.info("Starting ThinkDeeper+TIP processing")
+    """Main plugin execution function with ThinkDeeper's controlled thinking process"""
+    logger.info("Starting ThinkDeeper processing")
     
     # Extract config from request_config if provided
     config = DEFAULT_CONFIG.copy()
     if request_config:
-        thinkdeeper_config = request_config
         # Update only valid keys
         for key in DEFAULT_CONFIG:
-            if key in thinkdeeper_config:
-                config[key] = thinkdeeper_config[key]
+            if key in request_config:
+                config[key] = request_config[key]
 
     logger.info(f"Using config: {config}")
     
     try:
-        processor = ThinkDeeperTIPProcessor(config, tokenizer, model)
+        processor = ThinkDeeperProcessor(config, tokenizer, model)
         response = processor.reasoning_effort(messages)
         return response
         
     except Exception as e:
-        logger.error(f"Error in ThinkDeeper+TIP processing: {str(e)}")
+        logger.error(f"Error in ThinkDeeper processing: {str(e)}")
         raise