Skip to content

Commit 80b2490

Browse files
committed
refactor: use LLMCapability to decide on chat completion vs responses api
1 parent 8fdeba7 commit 80b2490

23 files changed

+731
-1219
lines changed

src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,21 @@ public ChatMessageResponseBody(
130130

131131
public ChatMessageResponseBody withResponse(@NotNull String response) {
132132
try {
133-
for (var item : new CompleteMessageParser().parse(response)) {
133+
var parser = new CompleteMessageParser();
134+
var segments = parser.parse(response);
135+
if (parser.getExtractedThought() != null && !parser.getExtractedThought().isBlank()) {
136+
processThinkingOutput(parser.getExtractedThought());
137+
}
138+
for (var item : segments) {
134139
processResponse(item, false);
135140
currentlyProcessedTextPane = null;
136141
currentlyProcessedEditorPanel = null;
137142
currentlyProcessedMermaidPanel = null;
138143
}
144+
var thoughtProcessPanel = getExistingThoughtProcessPanel();
145+
if (thoughtProcessPanel != null && !thoughtProcessPanel.isFinished()) {
146+
thoughtProcessPanel.setFinished();
147+
}
139148
} catch (Exception e) {
140149
LOG.error("Something went wrong while processing input", e);
141150
}

src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,33 @@ import ai.koog.agents.core.environment.ReceivedToolResult
1313
import ai.koog.agents.core.environment.result
1414
import ai.koog.agents.core.feature.handler.tool.ToolCallCompletedContext
1515
import ai.koog.agents.core.feature.handler.tool.ToolCallStartingContext
16+
import ai.koog.agents.core.tools.ToolDescriptor
1617
import ai.koog.agents.core.tools.ToolRegistry
1718
import ai.koog.agents.ext.tool.ExitTool
1819
import ai.koog.agents.ext.tool.shell.ShellCommandConfirmation
1920
import ai.koog.agents.features.eventHandler.feature.handleEvents
2021
import ai.koog.agents.features.tokenizer.feature.MessageTokenizer
2122
import ai.koog.agents.features.tokenizer.feature.tokenizer
23+
import ai.koog.prompt.dsl.Prompt
2224
import ai.koog.prompt.dsl.prompt
25+
import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
26+
import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking
2327
import ai.koog.prompt.executor.clients.LLMClient
28+
import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
29+
import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
30+
import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig
31+
import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary
2432
import ai.koog.prompt.executor.model.PromptExecutor
33+
import ai.koog.prompt.llm.LLMCapability
2534
import ai.koog.prompt.llm.LLMProvider
35+
import ai.koog.prompt.llm.LLModel
2636
import ai.koog.prompt.message.Message
37+
import ai.koog.prompt.params.LLMParams
2738
import ai.koog.prompt.tokenizer.Tokenizer
2839
import com.intellij.openapi.components.service
2940
import com.intellij.openapi.project.Project
3041
import ee.carlrobert.codegpt.EncodingManager
42+
import ee.carlrobert.codegpt.agent.clients.CustomOpenAILLMClient
3143
import ee.carlrobert.codegpt.agent.clients.RetryingPromptExecutor
3244
import ee.carlrobert.codegpt.agent.credits.extractCreditsSnapshot
3345
import ee.carlrobert.codegpt.agent.tools.*
@@ -46,6 +58,8 @@ import kotlin.time.Duration.Companion.seconds
4658
object AgentFactory {
4759

4860
private const val MAX_AGENT_ITERATIONS = 250
61+
private const val ANTHROPIC_MIN_THINKING_BUDGET = 512
62+
private const val ANTHROPIC_DEFAULT_THINKING_BUDGET = 2_048
4963

5064
fun createAgent(
5165
agentType: AgentType,
@@ -170,18 +184,111 @@ object AgentFactory {
170184
featureType: FeatureType = FeatureType.AGENT
171185
): PromptExecutor {
172186
val llmClient = LLMClientFactory.createClient(provider, featureType)
173-
return createRetryingExecutor(llmClient, events)
174-
}
175-
176-
private fun createRetryingExecutor(client: LLMClient, events: AgentEvents?): PromptExecutor {
177187
val policy = RetryingPromptExecutor.RetryPolicy(
178188
maxAttempts = 5,
179189
initialDelay = 1.seconds,
180190
maxDelay = 30.seconds,
181191
backoffMultiplier = 2.0,
182192
jitterFactor = 0.1
183193
)
184-
return RetryingPromptExecutor.fromClient(client, policy, events)
194+
return createRetryingExecutor(llmClient, policy, events)
195+
}
196+
197+
internal fun createRetryingExecutor(
198+
client: LLMClient,
199+
policy: RetryingPromptExecutor.RetryPolicy,
200+
events: AgentEvents?
201+
): PromptExecutor {
202+
val executor = RetryingPromptExecutor.fromClient(client, policy, events)
203+
return object : PromptExecutor {
204+
override fun executeStreaming(
205+
prompt: Prompt,
206+
model: LLModel,
207+
tools: List<ToolDescriptor>
208+
) = executor.executeStreaming(prompt.withReasoningParams(model), model, tools)
209+
210+
override suspend fun execute(
211+
prompt: Prompt,
212+
model: LLModel,
213+
tools: List<ToolDescriptor>
214+
) = executor.execute(prompt.withReasoningParams(model), model, tools)
215+
216+
override suspend fun moderate(prompt: Prompt, model: LLModel) =
217+
executor.moderate(prompt, model)
218+
219+
override suspend fun models() = executor.models()
220+
221+
override fun close() = executor.close()
222+
}
223+
}
224+
225+
private fun Prompt.withReasoningParams(model: LLModel): Prompt {
226+
val params = when (model.provider) {
227+
LLMProvider.OpenAI -> params.withOpenAIReasoning()
228+
CustomOpenAILLMClient.CustomOpenAI -> {
229+
if (model.supports(LLMCapability.OpenAIEndpoint.Responses)) {
230+
params.withOpenAIReasoning()
231+
} else {
232+
params
233+
}
234+
}
235+
LLMProvider.Anthropic -> params.withAnthropicReasoning()
236+
else -> params
237+
}
238+
return withParams(params)
239+
}
240+
241+
private fun LLMParams.withOpenAIReasoning(): LLMParams {
242+
val base = when (this) {
243+
is OpenAIResponsesParams -> this
244+
else -> OpenAIResponsesParams(
245+
temperature = temperature,
246+
maxTokens = maxTokens,
247+
numberOfChoices = numberOfChoices,
248+
speculation = speculation,
249+
schema = schema,
250+
toolChoice = toolChoice,
251+
user = user,
252+
additionalProperties = additionalProperties
253+
)
254+
}
255+
return base.copy(
256+
reasoning = base.reasoning ?: ReasoningConfig(
257+
effort = ReasoningEffort.MEDIUM,
258+
summary = ReasoningSummary.AUTO
259+
)
260+
)
261+
}
262+
263+
private fun LLMParams.withAnthropicReasoning(): LLMParams {
264+
val base = when (this) {
265+
is AnthropicParams -> this
266+
else -> AnthropicParams(
267+
temperature = temperature,
268+
maxTokens = maxTokens,
269+
numberOfChoices = numberOfChoices,
270+
speculation = speculation,
271+
schema = schema,
272+
toolChoice = toolChoice,
273+
user = user,
274+
additionalProperties = additionalProperties
275+
)
276+
}
277+
278+
if (base.thinking != null) return base
279+
280+
val thinkingBudget = resolveAnthropicThinkingBudget(base.maxTokens) ?: return base
281+
return base.copy(thinking = AnthropicThinking.Enabled(budgetTokens = thinkingBudget))
282+
}
283+
284+
private fun resolveAnthropicThinkingBudget(maxTokens: Int?): Int? {
285+
val limit = maxTokens ?: ANTHROPIC_DEFAULT_THINKING_BUDGET
286+
if (limit <= ANTHROPIC_MIN_THINKING_BUDGET) {
287+
return null
288+
}
289+
return (limit / 2)
290+
.coerceAtLeast(ANTHROPIC_MIN_THINKING_BUDGET)
291+
.coerceAtMost(ANTHROPIC_DEFAULT_THINKING_BUDGET)
185292
}
186293

187294
private fun createGeneralPurposeAgent(

src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import com.intellij.openapi.components.service
2323
import com.intellij.openapi.project.Project
2424
import ee.carlrobert.codegpt.EncodingManager
2525
import ee.carlrobert.codegpt.agent.clients.shouldStream
26-
import ee.carlrobert.codegpt.agent.clients.shouldStreamCustomOpenAI
2726
import ee.carlrobert.codegpt.agent.strategy.CODE_AGENT_COMPRESSION
2827
import ee.carlrobert.codegpt.agent.strategy.HistoryCompressionConfig
2928
import ee.carlrobert.codegpt.agent.strategy.SingleRunStrategyProvider
@@ -35,6 +34,7 @@ import ee.carlrobert.codegpt.settings.hooks.HookManager
3534
import ee.carlrobert.codegpt.settings.models.ModelSettings
3635
import ee.carlrobert.codegpt.settings.service.FeatureType
3736
import ee.carlrobert.codegpt.settings.service.ServiceType
37+
import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings
3838
import ee.carlrobert.codegpt.settings.skills.SkillDiscoveryService
3939
import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.BashPayload
4040
import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.ToolApprovalRequest
@@ -89,7 +89,7 @@ object ProxyAIAgent {
8989
val modelSelection =
9090
service<ModelSettings>().getModelSelectionForFeature(FeatureType.AGENT)
9191
val skills = project.service<SkillDiscoveryService>().listSkills()
92-
val stream = shouldStreamAgentToolLoop(provider)
92+
val stream = shouldStreamAgentToolLoop(project, provider)
9393
val projectInstructions = loadProjectInstructions(project.basePath)
9494
val executor = AgentFactory.createExecutor(provider, events)
9595
val pendingMessageQueue = pendingMessages.getOrPut(sessionId) { ArrayDeque() }
@@ -163,21 +163,43 @@ object ProxyAIAgent {
163163
val toolCallToUiId: MutableMap<String, String> = HashMap()
164164
val anonymousToolIds: ArrayDeque<String> = ArrayDeque()
165165
val frameAdapter = ReasoningFrameTextAdapter()
166+
var streamedReasoningForCurrentNode = false
166167

167168
onLLMStreamingFrameReceived { ctx ->
168169
if (!stream) return@onLLMStreamingFrameReceived
169170

170-
frameAdapter.consume(ctx.streamFrame).forEach { chunk ->
171+
val frameType = ctx.streamFrame::class.simpleName
172+
?: ctx.streamFrame::class.qualifiedName
173+
?: "unknown"
174+
val chunks = frameAdapter.consume(ctx.streamFrame)
175+
if (frameType.contains("Reasoning") && chunks.isNotEmpty()) {
176+
streamedReasoningForCurrentNode = true
177+
}
178+
179+
chunks.forEach { chunk ->
171180
if (chunk.isNotEmpty()) {
172181
events.onTextReceived(chunk)
173182
}
174183
}
175184
}
176185

177186
onNodeExecutionCompleted { ctx ->
178-
if (stream) return@onNodeExecutionCompleted
187+
val output = (ctx.output as? List<*>) ?: emptyList<Any?>()
188+
if (stream) {
189+
if (!streamedReasoningForCurrentNode) {
190+
output.forEach { msg ->
191+
(msg as? Message.Reasoning)?.let {
192+
if (it.content.isNotBlank()) {
193+
events.onTextReceived("<think>${it.content}</think>")
194+
}
195+
}
196+
}
197+
}
198+
streamedReasoningForCurrentNode = false
199+
return@onNodeExecutionCompleted
200+
}
179201

180-
(ctx.output as? List<*>)?.forEach { msg ->
202+
output.forEach { msg ->
181203
(msg as? Message.Assistant)?.let {
182204
events.onTextReceived(it.content)
183205
}
@@ -268,10 +290,19 @@ object ProxyAIAgent {
268290
}
269291

270292
private fun shouldStreamAgentToolLoop(
293+
project: Project,
271294
provider: ServiceType,
272295
): Boolean {
273296
return when (provider) {
274-
ServiceType.CUSTOM_OPENAI -> shouldStreamCustomOpenAI(FeatureType.AGENT)
297+
ServiceType.CUSTOM_OPENAI -> {
298+
val selectedModel =
299+
service<ModelSettings>().getModelSelectionForFeature(FeatureType.AGENT)
300+
val selectedServiceId = selectedModel.serviceId
301+
val selectedService = service<CustomServicesSettings>().state.services
302+
.firstOrNull { it.id == selectedServiceId }
303+
selectedService?.chatCompletionSettings?.shouldStream() == true
304+
}
305+
275306
ServiceType.GOOGLE -> false
276307
else -> true
277308
}

src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ import kotlinx.serialization.json.JsonElement
77

88
@Serializable
99
class CustomOpenAIChatCompletionRequest(
10-
val messages: List<OpenAIMessage> = emptyList(),
10+
val messages: List<OpenAIMessage>? = null,
11+
val input: JsonElement? = null,
1112
val prompt: String? = null,
1213
override val model: String? = null,
1314
override val stream: Boolean? = null,

0 commit comments

Comments
 (0)