Skip to content

Commit 3159768

Browse files
committed
fixes
1 parent 4f9bdc6 commit 3159768

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

optillm/inference.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,32 +1608,31 @@ def get_effort_profile(reasoning_effort: str, max_tokens: int = 4096) -> dict:
16081608
"max_tokens_pct": 0.33, # 33% of max_tokens
16091609
"max_thoughts": 4,
16101610
"thought_switch_tokens": [
1611-
"However,",
1612-
"Additionally,"
1611+
"Wait,",
1612+
"Alternatively,"
16131613
],
16141614
"prefill": "Let me think about this briefly..."
16151615
},
16161616
"medium": {
16171617
"min_tokens_pct": 0.33, # 33% of max_tokens
16181618
"max_tokens_pct": 0.66, # 66% of max_tokens
1619-
"max_thoughts": 16,
1619+
"max_thoughts": 8,
16201620
"thought_switch_tokens": [
1621-
"Additionally,",
1621+
"Wait,",
16221622
"Alternatively,",
16231623
"However,",
1624-
"Wait,"
16251624
],
16261625
"prefill": "Let me analyze this from multiple angles..."
16271626
},
16281627
"high": {
16291628
"min_tokens_pct": 0.66, # 66% of max_tokens
16301629
"max_tokens_pct": 0.90, # 90% of max_tokens
1631-
"max_thoughts": 32,
1630+
"max_thoughts": 16,
16321631
"thought_switch_tokens": [
1633-
"Additionally,",
1632+
"Wait,",
16341633
"Alternatively,",
16351634
"However,",
1636-
"Wait,"
1635+
"Additionally,",
16371636
],
16381637
"prefill": "This requires careful analysis. Let me think through it systematically..."
16391638
}

optillm/thinkdeeper.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def reasoning_effort(self, messages) -> str:
135135

136136
# Handle EOS token
137137
if next_token == self.model.config.eos_token_id:
138+
logger.debug("Found eos token")
138139
if seen_end_think:
139140
logger.debug("Reached EOS after end think token - stopping generation")
140141
response_chunks.append(next_str)
@@ -150,11 +151,12 @@ def reasoning_effort(self, messages) -> str:
150151
self.thought_count += 1
151152
continue
152153
else:
153-
# Force end think token if we haven't seen it
154-
logger.debug("Reached EOS without end think token - adding end token")
154+
# Force end think token and continue generating for natural conclusion
155+
logger.debug("Reached EOS without end think token - adding end token and continuing generation")
155156
response_chunks.append(self.tokenizer.decode([self.end_think_token]))
156-
response_chunks.append(next_str)
157-
break
157+
tokens = torch.tensor([[self.end_think_token]]).to(tokens.device)
158+
seen_end_think = True
159+
continue
158160

159161
# Normal token processing
160162
response_chunks.append(next_str)

0 commit comments

Comments
 (0)