From 765f0aa43c35337b8bd43c1eecf6ed7f7f9e5787 Mon Sep 17 00:00:00 2001 From: CellenLee <99465814+CellenLee@users.noreply.github.com> Date: Sun, 28 Sep 2025 19:56:37 +0800 Subject: [PATCH 1/2] chore: format fix --- src/api/providers/featherless.ts | 7 ++++++- src/core/tools/codebaseSearchTool.ts | 2 +- src/services/tree-sitter/queries/c-sharp.ts | 2 -- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/api/providers/featherless.ts b/src/api/providers/featherless.ts index 56d7177de7..2a985e2a87 100644 --- a/src/api/providers/featherless.ts +++ b/src/api/providers/featherless.ts @@ -1,4 +1,9 @@ -import { DEEP_SEEK_DEFAULT_TEMPERATURE, type FeatherlessModelId, featherlessDefaultModelId, featherlessModels } from "@roo-code/types" +import { + DEEP_SEEK_DEFAULT_TEMPERATURE, + type FeatherlessModelId, + featherlessDefaultModelId, + featherlessModels, +} from "@roo-code/types" import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index 700d1b7c7c..263fa8755e 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -17,7 +17,7 @@ export async function codebaseSearchTool( removeClosingTag: RemoveClosingTag, ) { const toolName = "codebase_search" - const workspacePath = (cline.cwd && cline.cwd.trim() !== '') ? cline.cwd : getWorkspacePath() + const workspacePath = cline.cwd && cline.cwd.trim() !== "" ? cline.cwd : getWorkspacePath() if (!workspacePath) { // This case should ideally not happen if Cline is initialized correctly diff --git a/src/services/tree-sitter/queries/c-sharp.ts b/src/services/tree-sitter/queries/c-sharp.ts index 350c24fff6..46f9651b36 100644 --- a/src/services/tree-sitter/queries/c-sharp.ts +++ b/src/services/tree-sitter/queries/c-sharp.ts @@ -63,5 +63,3 @@ export default ` ; LINQ expressions (query_expression) @definition.linq_expression ` - - \ No newline at end of file From 54820c92deada80992f86c8b7848c22e5858bae5 Mon Sep 17 00:00:00 2001 From: CellenLee <99465814+CellenLee@users.noreply.github.com> Date: Sun, 28 Sep 2025 20:04:03 +0800 Subject: [PATCH 2/2] feat: add prompt_cache_key --- src/api/index.ts | 3 ++- src/api/providers/anthropic-vertex.ts | 2 +- src/api/providers/anthropic.ts | 2 +- .../providers/base-openai-compatible-provider.ts | 6 +++++- src/api/providers/bedrock.ts | 2 +- src/api/providers/cerebras.ts | 4 +++- src/api/providers/chutes.ts | 11 +++++++++-- src/api/providers/deepinfra.ts | 11 +++++------ src/api/providers/fake-ai.ts | 4 ++-- src/api/providers/featherless.ts | 14 +++++++++++--- src/api/providers/gemini.ts | 2 +- src/api/providers/glama.ts | 4 +++- src/api/providers/huggingface.ts | 6 +++++- src/api/providers/human-relay.ts | 2 +- src/api/providers/lite-llm.ts | 6 +++++- src/api/providers/lm-studio.ts | 6 +++++- src/api/providers/mistral.ts | 2 +- src/api/providers/native-ollama.ts | 2 +- src/api/providers/ollama.ts | 6 +++++- src/api/providers/openai-native.ts | 4 +++- src/api/providers/openai.ts | 15 +++++++++++++-- src/api/providers/openrouter.ts | 9 +++++++-- src/api/providers/qwen-code.ts | 14 +++++++++++--- src/api/providers/requesty.ts | 6 +++++- src/api/providers/unbound.ts | 4 +++- src/api/providers/vercel-ai-gateway.ts | 6 +++++- src/api/providers/vscode-lm.ts | 4 ++-- src/api/providers/xai.ts | 6 +++++- src/core/task/Task.ts | 1 + 29 files changed, 122 insertions(+), 42 deletions(-) diff --git a/src/api/index.ts b/src/api/index.ts index ac00967676..f19fd3c74f 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -44,7 +44,7 @@ import { import { NativeOllamaHandler } from "./providers/native-ollama" export interface SingleCompletionHandler { - completePrompt(prompt: string): Promise + completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise } export interface ApiHandlerCreateMessageMetadata { @@ -65,6 +65,7 @@ export interface ApiHandlerCreateMessageMetadata { * @default true */ store?: boolean + safetyIdentifier?: string } export interface ApiHandler { diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index c70a15926d..b5597b5771 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -175,7 +175,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params } } - async completePrompt(prompt: string) { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata) { try { let { id, diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index cb48492b60..5c65c17967 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -278,7 +278,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } } - async completePrompt(prompt: string) { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata) { let { id: model, temperature } = this.getModel() const message = await this.client.messages.create({ diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fb6c5d0377..032e1c70b2 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -83,6 +83,8 @@ export abstract class BaseOpenAiCompatibleProvider messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], stream: true, stream_options: { include_usage: true }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } try { @@ -119,13 +121,15 @@ export abstract class BaseOpenAiCompatibleProvider } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId } = this.getModel() try { const response = await this.client.chat.completions.create({ model: modelId, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) return response.choices[0]?.message.content || "" diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index c6a0b35df4..1be3c13eaf 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -633,7 +633,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const modelConfig = this.getModel() diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index a0421844e8..04190ec6c4 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -277,7 +277,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: model } = this.getModel() // Prepare request body for non-streaming completion @@ -285,6 +285,8 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan model, messages: [{ role: "user", content: prompt }], stream: false, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } try { diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts index 62121bd19d..56fcd8f1f5 100644 --- a/src/api/providers/chutes.ts +++ b/src/api/providers/chutes.ts @@ -9,6 +9,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { ApiHandlerCreateMessageMetadata } from ".." export class ChutesHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { @@ -44,13 +45,19 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { } } - override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { const model = this.getModel() if (model.id.includes("DeepSeek-R1")) { const stream = await this.client.chat.completions.create({ ...this.getCompletionParams(systemPrompt, messages), messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) const matcher = new XmlMatcher( @@ -85,7 +92,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { yield processedChunk } } else { - yield* super.createMessage(systemPrompt, messages) + yield* super.createMessage(systemPrompt, messages, metadata) } } diff --git a/src/api/providers/deepinfra.ts b/src/api/providers/deepinfra.ts index 7cf018b069..1dde159c93 100644 --- a/src/api/providers/deepinfra.ts +++ b/src/api/providers/deepinfra.ts @@ -60,10 +60,6 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion // Ensure we have up-to-date model metadata await this.fetchModel() const { id: modelId, info, reasoningEffort: reasoning_effort } = await this.fetchModel() - let prompt_cache_key = undefined - if (info.supportsPromptCache && _metadata?.taskId) { - prompt_cache_key = _metadata.taskId - } const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, @@ -71,7 +67,8 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion stream: true, stream_options: { include_usage: true }, reasoning_effort, - prompt_cache_key, + prompt_cache_key: _metadata?.taskId, + safety_identifier: _metadata?.safetyIdentifier, } as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming if (this.supportsTemperature(modelId)) { @@ -106,13 +103,15 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { await this.fetchModel() const { id: modelId, info } = this.getModel() const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.supportsTemperature(modelId)) { requestOptions.temperature = this.options.modelTemperature ?? 0 diff --git a/src/api/providers/fake-ai.ts b/src/api/providers/fake-ai.ts index c73752fc66..14b79c4411 100644 --- a/src/api/providers/fake-ai.ts +++ b/src/api/providers/fake-ai.ts @@ -28,7 +28,7 @@ interface FakeAI { ): ApiStream getModel(): { id: string; info: ModelInfo } countTokens(content: Array): Promise - completePrompt(prompt: string): Promise + completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise } /** @@ -75,7 +75,7 @@ export class FakeAIHandler implements ApiHandler, SingleCompletionHandler { return this.ai.countTokens(content) } - completePrompt(prompt: string): Promise { + completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { return this.ai.completePrompt(prompt) } } diff --git a/src/api/providers/featherless.ts b/src/api/providers/featherless.ts index 2a985e2a87..009614508c 100644 --- a/src/api/providers/featherless.ts +++ b/src/api/providers/featherless.ts @@ -14,6 +14,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { ApiHandlerCreateMessageMetadata } from ".." export class FeatherlessHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { @@ -31,6 +32,7 @@ export class FeatherlessHandler extends BaseOpenAiCompatibleProvider { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const { id: model } = this.getModel() diff --git a/src/api/providers/glama.ts b/src/api/providers/glama.ts index 774d615709..3517fb22ed 100644 --- a/src/api/providers/glama.ts +++ b/src/api/providers/glama.ts @@ -116,13 +116,15 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId, info } = await this.fetchModel() try { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.supportsTemperature(modelId)) { diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 7b62046b99..94878c47ad 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -59,6 +59,8 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], stream: true, stream_options: { include_usage: true }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Add max_tokens if specified @@ -93,13 +95,15 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" try { const response = await this.client.chat.completions.create({ model: modelId, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) return response.choices[0]?.message.content || "" diff --git a/src/api/providers/human-relay.ts b/src/api/providers/human-relay.ts index c1dc3506e9..0f755cb984 100644 --- a/src/api/providers/human-relay.ts +++ b/src/api/providers/human-relay.ts @@ -82,7 +82,7 @@ export class HumanRelayHandler implements ApiHandler, SingleCompletionHandler { * Implementation of a single prompt * @param prompt Prompt content */ - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { // Copy to clipboard await vscode.env.clipboard.writeText(prompt) diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 9f58f09223..31818e18c7 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -123,6 +123,8 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa stream_options: { include_usage: true, }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // GPT-5 models require max_completion_tokens instead of the deprecated max_tokens parameter @@ -191,7 +193,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId, info } = await this.fetchModel() // Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens @@ -201,6 +203,8 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.supportsTemperature(modelId)) { diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 6c58a96ae1..782c54e9b2 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -87,6 +87,8 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { @@ -159,7 +161,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { // Create params object with optional draft model const params: any = { @@ -167,6 +169,8 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: [{ role: "user", content: prompt }], temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: false, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Add draft model if speculative decoding is enabled and a draft model is specified diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index fef215d43f..69113a3444 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -104,7 +104,7 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand return { id, info, maxTokens, temperature } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const { id: model, temperature } = this.getModel() diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 83a5c7b36e..39718419cf 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -280,7 +280,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const client = this.ensureClient() const { id: modelId } = await this.fetchModel() diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts index ab9df116aa..745b48f8eb 100644 --- a/src/api/providers/ollama.ts +++ b/src/api/providers/ollama.ts @@ -64,6 +64,8 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl temperature: this.options.modelTemperature ?? 0, stream: true, stream_options: { include_usage: true }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) } catch (error) { throw handleOpenAIError(error, this.providerName) @@ -109,7 +111,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const modelId = this.getModel().id const useR1Format = modelId.toLowerCase().includes("deepseek-r1") @@ -122,6 +124,8 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl : [{ role: "user", content: prompt }], temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), stream: false, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) } catch (error) { throw handleOpenAIError(error, this.providerName) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 8a205a06b4..9f9f3e83dc 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -1268,7 +1268,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.lastResponseId = responseId } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const model = this.getModel() const { verbosity, reasoning } = model @@ -1287,6 +1287,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ], stream: false, // Non-streaming for completePrompt store: false, // Don't store prompt completions + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Include service tier if selected and supported diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index aebe671712..8a3bb19895 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -95,7 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const ark = modelUrl.includes(".volces.com") if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { - yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages) + yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata) return } @@ -164,6 +164,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl stream: true as const, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), ...(reasoning && reasoning), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Add max_tokens if needed @@ -231,6 +233,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl : enabledLegacyFormat ? [systemMessage, ...convertToSimpleMessages(messages)] : [systemMessage, ...convertToOpenAiMessages(messages)], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Add max_tokens if needed @@ -272,7 +276,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl return { id, info, ...params } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) const model = this.getModel() @@ -281,6 +285,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: model.id, messages: [{ role: "user", content: prompt }], + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // Add max_tokens if needed @@ -310,6 +316,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl modelId: string, systemPrompt: string, messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { const modelInfo = this.getModel().info const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) @@ -330,6 +337,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, temperature: undefined, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // O3 family models do not support the deprecated max_tokens parameter @@ -360,6 +369,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ], reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, temperature: undefined, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } // O3 family models do not support the deprecated max_tokens parameter diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 580b173311..a0d8cadd85 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -24,7 +24,7 @@ import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" -import type { SingleCompletionHandler } from "../index" +import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" // Image generation types @@ -101,6 +101,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { const model = await this.fetchModel() @@ -161,6 +162,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }), ...(transforms && { transforms }), ...(reasoning && { reasoning }), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } let stream @@ -245,7 +248,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params } } - async completePrompt(prompt: string) { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata) { let { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel() const completionParams: OpenRouterChatCompletionParams = { @@ -264,6 +267,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }, }), ...(reasoning && { reasoning }), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } let response diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index d930d9dfc7..529432ea8a 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -12,7 +12,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" -import type { SingleCompletionHandler } from "../index" +import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai" const QWEN_OAUTH_TOKEN_ENDPOINT = `${QWEN_OAUTH_BASE_URL}/api/v1/oauth2/token` @@ -201,7 +201,11 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan } } - override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { await this.ensureAuthenticated() const client = this.ensureClient() const model = this.getModel() @@ -220,6 +224,8 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan stream: true, stream_options: { include_usage: true }, max_completion_tokens: model.info.maxTokens, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } const stream = await this.callApiWithRetry(() => client.chat.completions.create(requestOptions)) @@ -290,7 +296,7 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan return { id, info } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { await this.ensureAuthenticated() const client = this.ensureClient() const model = this.getModel() @@ -299,6 +305,8 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan model: model.id, messages: [{ role: "user", content: prompt }], max_completion_tokens: model.info.maxTokens, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } const response = await this.callApiWithRetry(() => client.chat.completions.create(requestOptions)) diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 16aefae528..aeb1318b93 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -128,6 +128,8 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan stream: true, stream_options: { include_usage: true }, requesty: { trace_id: metadata?.taskId, extra: { mode: metadata?.mode } }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } let stream @@ -159,7 +161,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: model, maxTokens: max_tokens, temperature } = await this.fetchModel() let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }] @@ -169,6 +171,8 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan max_tokens, messages: openAiMessages, temperature: temperature, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } let response: OpenAI.Chat.ChatCompletion diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index bc85dfd499..f8b8b3d540 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -132,7 +132,7 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId, info } = await this.fetchModel() try { @@ -142,6 +142,8 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa unbound_metadata: { originApp: ORIGIN_APP, }, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.supportsTemperature(modelId)) { diff --git a/src/api/providers/vercel-ai-gateway.ts b/src/api/providers/vercel-ai-gateway.ts index be77d35986..f196703f62 100644 --- a/src/api/providers/vercel-ai-gateway.ts +++ b/src/api/providers/vercel-ai-gateway.ts @@ -60,6 +60,8 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp : undefined, max_completion_tokens: info.maxTokens, stream: true, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } const completion = await this.client.chat.completions.create(body) @@ -87,7 +89,7 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId, info } = await this.fetchModel() try { @@ -95,6 +97,8 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp model: modelId, messages: [{ role: "user", content: prompt }], stream: false, + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, } if (this.supportsTemperature(modelId)) { diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index d8a492f772..ce1b1c6bd5 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -34,7 +34,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". * // Stream a conversation * const systemPrompt = "You are a helpful assistant"; * const messages = [{ role: "user", content: "Hello!" }]; - * for await (const chunk of handler.createMessage(systemPrompt, messages)) { + * for await (const chunk of handler.createMessage(systemPrompt, messages, metadata)) { * console.log(chunk); * } * ``` @@ -536,7 +536,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { try { const client = await this.getClient() const response = await client.sendRequest( diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index 7eb6e9866d..964c639df6 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -63,6 +63,8 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler stream: true, stream_options: { include_usage: true }, ...(reasoning && reasoning), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) } catch (error) { throw handleOpenAIError(error, this.providerName) @@ -109,7 +111,7 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler } } - async completePrompt(prompt: string): Promise { + async completePrompt(prompt: string, metadata?: ApiHandlerCreateMessageMetadata): Promise { const { id: modelId, reasoning } = this.getModel() try { @@ -117,6 +119,8 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler model: modelId, messages: [{ role: "user", content: prompt }], ...(reasoning && reasoning), + prompt_cache_key: metadata?.taskId, + safety_identifier: metadata?.safetyIdentifier, }) return response.choices[0]?.message.content || "" diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2dd9e55c0b..565d146cdf 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2672,6 +2672,7 @@ export class Task extends EventEmitter implements TaskLike { ...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}), // If a condense just occurred, explicitly suppress continuity fallback for the next call ...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}), + safetyIdentifier: undefined, } // Reset skip flag after applying (it only affects the immediate next call)