fixes

codelion · codelion · commit 31597684fefa · 2025-02-23T22:04:13.000+08:00
diff --git a/optillm/inference.py b/optillm/inference.py
@@ -1608,32 +1608,31 @@ def get_effort_profile(reasoning_effort: str, max_tokens: int = 4096) -> dict:
             "max_tokens_pct": 0.33,  # 33% of max_tokens
             "max_thoughts": 4,
             "thought_switch_tokens": [
-                "However,",
-                "Additionally,"
+                "Wait,",
+                "Alternatively,"
             ],
             "prefill": "Let me think about this briefly..."
         },
         "medium": {
             "min_tokens_pct": 0.33,  # 33% of max_tokens
             "max_tokens_pct": 0.66,  # 66% of max_tokens
-            "max_thoughts": 16,
+            "max_thoughts": 8,
             "thought_switch_tokens": [
-                "Additionally,",
+                "Wait,",
                 "Alternatively,",
                 "However,",
-                "Wait,"
             ],
             "prefill": "Let me analyze this from multiple angles..."
         },
         "high": {
             "min_tokens_pct": 0.66,  # 66% of max_tokens
             "max_tokens_pct": 0.90,  # 90% of max_tokens
-            "max_thoughts": 32,
+            "max_thoughts": 16,
             "thought_switch_tokens": [
-                "Additionally,",
+                "Wait,",
                 "Alternatively,",
                 "However,",
-                "Wait,"
+                "Additionally,",
             ],
             "prefill": "This requires careful analysis. Let me think through it systematically..."
         }
diff --git a/optillm/thinkdeeper.py b/optillm/thinkdeeper.py
@@ -135,6 +135,7 @@ def reasoning_effort(self, messages) -> str:
 
             # Handle EOS token
             if next_token == self.model.config.eos_token_id:
+                logger.debug("Found eos token")
                 if seen_end_think:
                     logger.debug("Reached EOS after end think token - stopping generation")
                     response_chunks.append(next_str)
@@ -150,11 +151,12 @@ def reasoning_effort(self, messages) -> str:
                     self.thought_count += 1
                     continue
                 else:
-                    # Force end think token if we haven't seen it
-                    logger.debug("Reached EOS without end think token - adding end token")
+                    # Force end think token and continue generating for natural conclusion
+                    logger.debug("Reached EOS without end think token - adding end token and continuing generation")
                     response_chunks.append(self.tokenizer.decode([self.end_think_token]))
-                    response_chunks.append(next_str)
-                    break
+                    tokens = torch.tensor([[self.end_think_token]]).to(tokens.device)
+                    seen_end_think = True
+                    continue
             
             # Normal token processing
             response_chunks.append(next_str)