diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 8b919d4a30..ac3926ac37 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -1,3 +1,5 @@ +export * from "./providers/index.js" + export * from "./api.js" export * from "./codebase-index.js" export * from "./cloud.js" diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts new file mode 100644 index 0000000000..d0f1629ee9 --- /dev/null +++ b/packages/types/src/providers/anthropic.ts @@ -0,0 +1,100 @@ +import type { ModelInfo } from "../model.js" + +// https://docs.anthropic.com/en/docs/about-claude/models + +export type AnthropicModelId = keyof typeof anthropicModels +export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514" + +export const anthropicModels = { + "claude-sonnet-4-20250514": { + maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, // $3 per million input tokens + outputPrice: 15.0, // $15 per million output tokens + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens + supportsReasoningBudget: true, + }, + "claude-opus-4-20250514": { + maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 15.0, // $15 per million input tokens + outputPrice: 75.0, // $75 per million output tokens + cacheWritesPrice: 18.75, // $18.75 per million tokens + cacheReadsPrice: 1.5, // $1.50 per million tokens + supportsReasoningBudget: true, + }, + "claude-3-7-sonnet-20250219:thinking": { + maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k. + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, // $3 per million input tokens + outputPrice: 15.0, // $15 per million output tokens + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "claude-3-7-sonnet-20250219": { + maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here. + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, // $3 per million input tokens + outputPrice: 15.0, // $15 per million output tokens + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens + }, + "claude-3-5-sonnet-20241022": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, // $3 per million input tokens + outputPrice: 15.0, // $15 per million output tokens + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens + }, + "claude-3-5-haiku-20241022": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.0, + outputPrice: 5.0, + cacheWritesPrice: 1.25, + cacheReadsPrice: 0.1, + }, + "claude-3-opus-20240229": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 15.0, + outputPrice: 75.0, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, + }, + "claude-3-haiku-20240307": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.25, + outputPrice: 1.25, + cacheWritesPrice: 0.3, + cacheReadsPrice: 0.03, + }, +} as const satisfies Record + +export const ANTHROPIC_DEFAULT_MAX_TOKENS = 8192 diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts new file mode 100644 index 0000000000..f40dc8c8f6 --- /dev/null +++ b/packages/types/src/providers/bedrock.ts @@ -0,0 +1,432 @@ +import type { ModelInfo } from "../model.js" + +// https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html + +export type BedrockModelId = keyof typeof bedrockModels + +export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-sonnet-4-20250514-v1:0" + +export const bedrockDefaultPromptRouterModelId: BedrockModelId = "anthropic.claude-3-sonnet-20240229-v1:0" + +// March, 12 2025 - updated prices to match US-West-2 list price shown at +// https://aws.amazon.com/bedrock/pricing, including older models that are part +// of the default prompt routers AWS enabled for GA of the promot router +// feature. +export const bedrockModels = { + "amazon.nova-pro-v1:0": { + maxTokens: 5000, + contextWindow: 300_000, + supportsImages: true, + supportsComputerUse: false, + supportsPromptCache: true, + inputPrice: 0.8, + outputPrice: 3.2, + cacheWritesPrice: 0.8, // per million tokens + cacheReadsPrice: 0.2, // per million tokens + minTokensPerCachePoint: 1, + maxCachePoints: 1, + cachableFields: ["system"], + }, + "amazon.nova-pro-latency-optimized-v1:0": { + maxTokens: 5000, + contextWindow: 300_000, + supportsImages: true, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 1.0, + outputPrice: 4.0, + cacheWritesPrice: 1.0, // per million tokens + cacheReadsPrice: 0.25, // per million tokens + description: "Amazon Nova Pro with latency optimized inference", + }, + "amazon.nova-lite-v1:0": { + maxTokens: 5000, + contextWindow: 300_000, + supportsImages: true, + supportsComputerUse: false, + supportsPromptCache: true, + inputPrice: 0.06, + outputPrice: 0.24, + cacheWritesPrice: 0.06, // per million tokens + cacheReadsPrice: 0.015, // per million tokens + minTokensPerCachePoint: 1, + maxCachePoints: 1, + cachableFields: ["system"], + }, + "amazon.nova-micro-v1:0": { + maxTokens: 5000, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: true, + inputPrice: 0.035, + outputPrice: 0.14, + cacheWritesPrice: 0.035, // per million tokens + cacheReadsPrice: 0.00875, // per million tokens + minTokensPerCachePoint: 1, + maxCachePoints: 1, + cachableFields: ["system"], + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + }, + "anthropic.claude-opus-4-20250514-v1:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 15.0, + outputPrice: 75.0, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + }, + "anthropic.claude-3-7-sonnet-20250219-v1:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + }, + "anthropic.claude-3-5-sonnet-20241022-v2:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + }, + "anthropic.claude-3-5-haiku-20241022-v1:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.8, + outputPrice: 4.0, + cacheWritesPrice: 1.0, + cacheReadsPrice: 0.08, + minTokensPerCachePoint: 2048, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + }, + "anthropic.claude-3-5-sonnet-20240620-v1:0": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 3.0, + outputPrice: 15.0, + }, + "anthropic.claude-3-opus-20240229-v1:0": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 15.0, + outputPrice: 75.0, + }, + "anthropic.claude-3-sonnet-20240229-v1:0": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 3.0, + outputPrice: 15.0, + }, + "anthropic.claude-3-haiku-20240307-v1:0": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.25, + outputPrice: 1.25, + }, + "anthropic.claude-2-1-v1:0": { + maxTokens: 4096, + contextWindow: 100_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 8.0, + outputPrice: 24.0, + description: "Claude 2.1", + }, + "anthropic.claude-2-0-v1:0": { + maxTokens: 4096, + contextWindow: 100_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 8.0, + outputPrice: 24.0, + description: "Claude 2.0", + }, + "anthropic.claude-instant-v1:0": { + maxTokens: 4096, + contextWindow: 100_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.8, + outputPrice: 2.4, + description: "Claude Instant", + }, + "deepseek.r1-v1:0": { + maxTokens: 32_768, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 1.35, + outputPrice: 5.4, + }, + "meta.llama3-3-70b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.72, + outputPrice: 0.72, + description: "Llama 3.3 Instruct (70B)", + }, + "meta.llama3-2-90b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: true, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.72, + outputPrice: 0.72, + description: "Llama 3.2 Instruct (90B)", + }, + "meta.llama3-2-11b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: true, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.16, + outputPrice: 0.16, + description: "Llama 3.2 Instruct (11B)", + }, + "meta.llama3-2-3b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 0.15, + description: "Llama 3.2 Instruct (3B)", + }, + "meta.llama3-2-1b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.1, + outputPrice: 0.1, + description: "Llama 3.2 Instruct (1B)", + }, + "meta.llama3-1-405b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 2.4, + outputPrice: 2.4, + description: "Llama 3.1 Instruct (405B)", + }, + "meta.llama3-1-70b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.72, + outputPrice: 0.72, + description: "Llama 3.1 Instruct (70B)", + }, + "meta.llama3-1-70b-instruct-latency-optimized-v1:0": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.9, + outputPrice: 0.9, + description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)", + }, + "meta.llama3-1-8b-instruct-v1:0": { + maxTokens: 8192, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.22, + outputPrice: 0.22, + description: "Llama 3.1 Instruct (8B)", + }, + "meta.llama3-70b-instruct-v1:0": { + maxTokens: 2048, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 2.65, + outputPrice: 3.5, + }, + "meta.llama3-8b-instruct-v1:0": { + maxTokens: 2048, + contextWindow: 4_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.3, + outputPrice: 0.6, + }, + "amazon.titan-text-lite-v1:0": { + maxTokens: 4096, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 0.2, + description: "Amazon Titan Text Lite", + }, + "amazon.titan-text-express-v1:0": { + maxTokens: 4096, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.2, + outputPrice: 0.6, + description: "Amazon Titan Text Express", + }, + "amazon.titan-text-embeddings-v1:0": { + maxTokens: 8192, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.1, + description: "Amazon Titan Text Embeddings", + }, + "amazon.titan-text-embeddings-v2:0": { + maxTokens: 8192, + contextWindow: 8_000, + supportsImages: false, + supportsComputerUse: false, + supportsPromptCache: false, + inputPrice: 0.02, + description: "Amazon Titan Text Embeddings V2", + }, +} as const satisfies Record + +export const BEDROCK_DEFAULT_TEMPERATURE = 0.3 + +export const BEDROCK_MAX_TOKENS = 4096 + +export const BEDROCK_REGION_INFO: Record< + string, + { + regionId: string + description: string + pattern?: string + multiRegion?: boolean + } +> = { + /* + * This JSON generated by AWS's AI assistant - Amazon Q on March 29, 2025 + * + * - Africa (Cape Town) region does not appear to support Amazon Bedrock at this time. + * - Some Asia Pacific regions, such as Asia Pacific (Hong Kong) and Asia Pacific (Jakarta), are not listed among the supported regions for Bedrock services. + * - Middle East regions, including Middle East (Bahrain) and Middle East (UAE), are not mentioned in the list of supported regions for Bedrock. [3] + * - China regions (Beijing and Ningxia) are not listed as supported for Amazon Bedrock. + * - Some newer or specialized AWS regions may not have Bedrock support yet. + */ + "us.": { regionId: "us-east-1", description: "US East (N. Virginia)", pattern: "us-", multiRegion: true }, + "use.": { regionId: "us-east-1", description: "US East (N. Virginia)" }, + "use1.": { regionId: "us-east-1", description: "US East (N. Virginia)" }, + "use2.": { regionId: "us-east-2", description: "US East (Ohio)" }, + "usw.": { regionId: "us-west-2", description: "US West (Oregon)" }, + "usw2.": { regionId: "us-west-2", description: "US West (Oregon)" }, + "ug.": { + regionId: "us-gov-west-1", + description: "AWS GovCloud (US-West)", + pattern: "us-gov-", + multiRegion: true, + }, + "uge1.": { regionId: "us-gov-east-1", description: "AWS GovCloud (US-East)" }, + "ugw1.": { regionId: "us-gov-west-1", description: "AWS GovCloud (US-West)" }, + "eu.": { regionId: "eu-west-1", description: "Europe (Ireland)", pattern: "eu-", multiRegion: true }, + "euw1.": { regionId: "eu-west-1", description: "Europe (Ireland)" }, + "euw2.": { regionId: "eu-west-2", description: "Europe (London)" }, + "euw3.": { regionId: "eu-west-3", description: "Europe (Paris)" }, + "euc1.": { regionId: "eu-central-1", description: "Europe (Frankfurt)" }, + "euc2.": { regionId: "eu-central-2", description: "Europe (Zurich)" }, + "eun1.": { regionId: "eu-north-1", description: "Europe (Stockholm)" }, + "eus1.": { regionId: "eu-south-1", description: "Europe (Milan)" }, + "eus2.": { regionId: "eu-south-2", description: "Europe (Spain)" }, + "ap.": { + regionId: "ap-southeast-1", + description: "Asia Pacific (Singapore)", + pattern: "ap-", + multiRegion: true, + }, + "ape1.": { regionId: "ap-east-1", description: "Asia Pacific (Hong Kong)" }, + "apne1.": { regionId: "ap-northeast-1", description: "Asia Pacific (Tokyo)" }, + "apne2.": { regionId: "ap-northeast-2", description: "Asia Pacific (Seoul)" }, + "apne3.": { regionId: "ap-northeast-3", description: "Asia Pacific (Osaka)" }, + "aps1.": { regionId: "ap-south-1", description: "Asia Pacific (Mumbai)" }, + "aps2.": { regionId: "ap-south-2", description: "Asia Pacific (Hyderabad)" }, + "apse1.": { regionId: "ap-southeast-1", description: "Asia Pacific (Singapore)" }, + "apse2.": { regionId: "ap-southeast-2", description: "Asia Pacific (Sydney)" }, + "ca.": { regionId: "ca-central-1", description: "Canada (Central)", pattern: "ca-", multiRegion: true }, + "cac1.": { regionId: "ca-central-1", description: "Canada (Central)" }, + "sa.": { regionId: "sa-east-1", description: "South America (São Paulo)", pattern: "sa-", multiRegion: true }, + "sae1.": { regionId: "sa-east-1", description: "South America (São Paulo)" }, + + // These are not official - they weren't generated by Amazon Q nor were + // found in the AWS documentation but another Roo contributor found apac. + // Was needed so I've added the pattern of the other geo zones. + "apac.": { regionId: "ap-southeast-1", description: "Default APAC region", pattern: "ap-", multiRegion: true }, + "emea.": { regionId: "eu-west-1", description: "Default EMEA region", pattern: "eu-", multiRegion: true }, + "amer.": { regionId: "us-east-1", description: "Default Americas region", pattern: "us-", multiRegion: true }, +} + +export const BEDROCK_REGIONS = Object.values(BEDROCK_REGION_INFO) + // Extract all region IDs + .map((info) => ({ value: info.regionId, label: info.regionId })) + // Filter to unique region IDs (remove duplicates) + .filter((region, index, self) => index === self.findIndex((r) => r.value === region.value)) + // Sort alphabetically by region ID + .sort((a, b) => a.value.localeCompare(b.value)) diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts new file mode 100644 index 0000000000..524f842059 --- /dev/null +++ b/packages/types/src/providers/chutes.ts @@ -0,0 +1,229 @@ +import type { ModelInfo } from "../model.js" + +// https://llm.chutes.ai/v1 (OpenAI compatible) +export type ChutesModelId = + | "deepseek-ai/DeepSeek-R1-0528" + | "deepseek-ai/DeepSeek-R1" + | "deepseek-ai/DeepSeek-V3" + | "unsloth/Llama-3.3-70B-Instruct" + | "chutesai/Llama-4-Scout-17B-16E-Instruct" + | "unsloth/Mistral-Nemo-Instruct-2407" + | "unsloth/gemma-3-12b-it" + | "NousResearch/DeepHermes-3-Llama-3-8B-Preview" + | "unsloth/gemma-3-4b-it" + | "nvidia/Llama-3_3-Nemotron-Super-49B-v1" + | "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1" + | "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8" + | "deepseek-ai/DeepSeek-V3-Base" + | "deepseek-ai/DeepSeek-R1-Zero" + | "deepseek-ai/DeepSeek-V3-0324" + | "Qwen/Qwen3-235B-A22B" + | "Qwen/Qwen3-32B" + | "Qwen/Qwen3-30B-A3B" + | "Qwen/Qwen3-14B" + | "Qwen/Qwen3-8B" + | "microsoft/MAI-DS-R1-FP8" + | "tngtech/DeepSeek-R1T-Chimera" + +export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528" + +export const chutesModels = { + "deepseek-ai/DeepSeek-R1-0528": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1 0528 model.", + }, + "deepseek-ai/DeepSeek-R1": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1 model.", + }, + "deepseek-ai/DeepSeek-V3": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek V3 model.", + }, + "unsloth/Llama-3.3-70B-Instruct": { + maxTokens: 32768, // From Groq + contextWindow: 131072, // From Groq + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Unsloth Llama 3.3 70B Instruct model.", + }, + "chutesai/Llama-4-Scout-17B-16E-Instruct": { + maxTokens: 32768, + contextWindow: 512000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context.", + }, + "unsloth/Mistral-Nemo-Instruct-2407": { + maxTokens: 32768, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Unsloth Mistral Nemo Instruct model.", + }, + "unsloth/gemma-3-12b-it": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Unsloth Gemma 3 12B IT model.", + }, + "NousResearch/DeepHermes-3-Llama-3-8B-Preview": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Nous DeepHermes 3 Llama 3 8B Preview model.", + }, + "unsloth/gemma-3-4b-it": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Unsloth Gemma 3 4B IT model.", + }, + "nvidia/Llama-3_3-Nemotron-Super-49B-v1": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Nvidia Llama 3.3 Nemotron Super 49B model.", + }, + "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Nvidia Llama 3.1 Nemotron Ultra 253B model.", + }, + "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": { + maxTokens: 32768, + contextWindow: 256000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model.", + }, + "deepseek-ai/DeepSeek-V3-Base": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek V3 Base model.", + }, + "deepseek-ai/DeepSeek-R1-Zero": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1 Zero model.", + }, + "deepseek-ai/DeepSeek-V3-0324": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek V3 (0324) model.", + }, + "Qwen/Qwen3-235B-A22B": { + maxTokens: 32768, + contextWindow: 40960, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Qwen3 235B A22B model.", + }, + "Qwen/Qwen3-32B": { + maxTokens: 32768, + contextWindow: 40960, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Qwen3 32B model.", + }, + "Qwen/Qwen3-30B-A3B": { + maxTokens: 32768, + contextWindow: 40960, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Qwen3 30B A3B model.", + }, + "Qwen/Qwen3-14B": { + maxTokens: 32768, + contextWindow: 40960, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Qwen3 14B model.", + }, + "Qwen/Qwen3-8B": { + maxTokens: 32768, + contextWindow: 40960, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Qwen3 8B model.", + }, + "microsoft/MAI-DS-R1-FP8": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Microsoft MAI-DS-R1 FP8 model.", + }, + "tngtech/DeepSeek-R1T-Chimera": { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "TNGTech DeepSeek R1T Chimera model.", + }, +} as const satisfies Record diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts new file mode 100644 index 0000000000..5ef757ffdf --- /dev/null +++ b/packages/types/src/providers/deepseek.ts @@ -0,0 +1,33 @@ +import type { ModelInfo } from "../model.js" + +// https://platform.deepseek.com/docs/api +export type DeepSeekModelId = keyof typeof deepSeekModels + +export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" + +export const deepSeekModels = { + "deepseek-chat": { + maxTokens: 8192, + contextWindow: 64_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.27, // $0.27 per million tokens (cache miss) + outputPrice: 1.1, // $1.10 per million tokens + cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss) + cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit). + description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, + }, + "deepseek-reasoner": { + maxTokens: 8192, + contextWindow: 64_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.55, // $0.55 per million tokens (cache miss) + outputPrice: 2.19, // $2.19 per million tokens + cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss) + cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit) + description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`, + }, +} as const satisfies Record + +export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6 diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts new file mode 100644 index 0000000000..2ddf594704 --- /dev/null +++ b/packages/types/src/providers/gemini.ts @@ -0,0 +1,221 @@ +import type { ModelInfo } from "../model.js" + +// https://ai.google.dev/gemini-api/docs/models/gemini +export type GeminiModelId = keyof typeof geminiModels + +export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001" + +export const geminiModels = { + "gemini-2.5-flash-preview-04-17:thinking": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 3.5, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "gemini-2.5-flash-preview-04-17": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 0.6, + }, + "gemini-2.5-flash-preview-05-20:thinking": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 3.5, + cacheReadsPrice: 0.0375, + cacheWritesPrice: 1.0, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "gemini-2.5-flash-preview-05-20": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + cacheReadsPrice: 0.0375, + cacheWritesPrice: 1.0, + }, + "gemini-2.5-pro-exp-03-25": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.5-pro-preview-03-25": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. + outputPrice: 15, + cacheReadsPrice: 0.625, + cacheWritesPrice: 4.5, + tiers: [ + { + contextWindow: 200_000, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.31, + }, + { + contextWindow: Infinity, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.625, + }, + ], + }, + "gemini-2.5-pro-preview-05-06": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. + outputPrice: 15, + cacheReadsPrice: 0.625, + cacheWritesPrice: 4.5, + tiers: [ + { + contextWindow: 200_000, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.31, + }, + { + contextWindow: Infinity, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.625, + }, + ], + }, + "gemini-2.0-flash-001": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.1, + outputPrice: 0.4, + cacheReadsPrice: 0.025, + cacheWritesPrice: 1.0, + }, + "gemini-2.0-flash-lite-preview-02-05": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-pro-exp-02-05": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-flash-thinking-exp-01-21": { + maxTokens: 65_536, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-flash-thinking-exp-1219": { + maxTokens: 8192, + contextWindow: 32_767, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-flash-exp": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-1.5-flash-002": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, // This is the pricing for prompts above 128k tokens. + outputPrice: 0.6, + cacheReadsPrice: 0.0375, + cacheWritesPrice: 1.0, + tiers: [ + { + contextWindow: 128_000, + inputPrice: 0.075, + outputPrice: 0.3, + cacheReadsPrice: 0.01875, + }, + { + contextWindow: Infinity, + inputPrice: 0.15, + outputPrice: 0.6, + cacheReadsPrice: 0.0375, + }, + ], + }, + "gemini-1.5-flash-exp-0827": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-1.5-flash-8b-exp-0827": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-1.5-pro-002": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-1.5-pro-exp-0827": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-exp-1206": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, +} as const satisfies Record diff --git a/packages/types/src/providers/glama.ts b/packages/types/src/providers/glama.ts new file mode 100644 index 0000000000..ea05d2c47f --- /dev/null +++ b/packages/types/src/providers/glama.ts @@ -0,0 +1,20 @@ +import type { ModelInfo } from "../model.js" + +// https://glama.ai/models +export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet" + +export const glamaDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: + "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", +} + +export const GLAMA_DEFAULT_TEMPERATURE = 0 diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts new file mode 100644 index 0000000000..c48ee0e95d --- /dev/null +++ b/packages/types/src/providers/groq.ts @@ -0,0 +1,80 @@ +import type { ModelInfo } from "../model.js" + +// https://console.groq.com/docs/models +export type GroqModelId = + | "llama-3.1-8b-instant" + | "llama-3.3-70b-versatile" + | "meta-llama/llama-4-scout-17b-16e-instruct" + | "meta-llama/llama-4-maverick-17b-128e-instruct" + | "mistral-saba-24b" + | "qwen-qwq-32b" + | "deepseek-r1-distill-llama-70b" + +export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defaulting to Llama3 70B Versatile + +export const groqModels = { + // Models based on API response: https://api.groq.com/openai/v1/models + "llama-3.1-8b-instant": { + maxTokens: 131072, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Meta Llama 3.1 8B Instant model, 128K context.", + }, + "llama-3.3-70b-versatile": { + maxTokens: 32768, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Meta Llama 3.3 70B Versatile model, 128K context.", + }, + "meta-llama/llama-4-scout-17b-16e-instruct": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Meta Llama 4 Scout 17B Instruct model, 128K context.", + }, + "meta-llama/llama-4-maverick-17b-128e-instruct": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.", + }, + "mistral-saba-24b": { + maxTokens: 32768, + contextWindow: 32768, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Mistral Saba 24B model, 32K context.", + }, + "qwen-qwq-32b": { + maxTokens: 131072, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Alibaba Qwen QwQ 32B model, 128K context.", + }, + "deepseek-r1-distill-llama-70b": { + maxTokens: 131072, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1 Distill Llama 70B model, 128K context.", + }, +} as const satisfies Record diff --git a/packages/types/src/providers/index.ts b/packages/types/src/providers/index.ts new file mode 100644 index 0000000000..5f1c08041f --- /dev/null +++ b/packages/types/src/providers/index.ts @@ -0,0 +1,17 @@ +export * from "./anthropic.js" +export * from "./bedrock.js" +export * from "./chutes.js" +export * from "./deepseek.js" +export * from "./gemini.js" +export * from "./glama.js" +export * from "./groq.js" +export * from "./lite-llm.js" +export * from "./lm-studio.js" +export * from "./mistral.js" +export * from "./openai.js" +export * from "./openrouter.js" +export * from "./requesty.js" +export * from "./unbound.js" +export * from "./vertex.js" +export * from "./vscode-llm.js" +export * from "./xai.js" diff --git a/packages/types/src/providers/lite-llm.ts b/packages/types/src/providers/lite-llm.ts new file mode 100644 index 0000000000..303aa2b298 --- /dev/null +++ b/packages/types/src/providers/lite-llm.ts @@ -0,0 +1,48 @@ +import type { ModelInfo } from "../model.js" + +// https://docs.litellm.ai/ +export const litellmDefaultModelId = "claude-3-7-sonnet-20250219" + +export const litellmDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, +} + +export const LITELLM_COMPUTER_USE_MODELS = new Set([ + "claude-3-5-sonnet-latest", + "claude-opus-4-20250514", + "claude-sonnet-4-20250514", + "claude-3-7-sonnet-latest", + "claude-3-7-sonnet-20250219", + "claude-3-5-sonnet-20241022", + "vertex_ai/claude-3-5-sonnet", + "vertex_ai/claude-3-5-sonnet-v2", + "vertex_ai/claude-3-5-sonnet-v2@20241022", + "vertex_ai/claude-3-7-sonnet@20250219", + "vertex_ai/claude-opus-4@20250514", + "vertex_ai/claude-sonnet-4@20250514", + "openrouter/anthropic/claude-3.5-sonnet", + "openrouter/anthropic/claude-3.5-sonnet:beta", + "openrouter/anthropic/claude-3.7-sonnet", + "openrouter/anthropic/claude-3.7-sonnet:beta", + "anthropic.claude-opus-4-20250514-v1:0", + "anthropic.claude-sonnet-4-20250514-v1:0", + "anthropic.claude-3-7-sonnet-20250219-v1:0", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + "us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "us.anthropic.claude-opus-4-20250514-v1:0", + "us.anthropic.claude-sonnet-4-20250514-v1:0", + "eu.anthropic.claude-3-5-sonnet-20241022-v2:0", + "eu.anthropic.claude-3-7-sonnet-20250219-v1:0", + "eu.anthropic.claude-opus-4-20250514-v1:0", + "eu.anthropic.claude-sonnet-4-20250514-v1:0", + "snowflake/claude-3-5-sonnet", +]) diff --git a/packages/types/src/providers/lm-studio.ts b/packages/types/src/providers/lm-studio.ts new file mode 100644 index 0000000000..f83bbc1039 --- /dev/null +++ b/packages/types/src/providers/lm-studio.ts @@ -0,0 +1 @@ +export const LMSTUDIO_DEFAULT_TEMPERATURE = 0 diff --git a/packages/types/src/providers/mistral.ts b/packages/types/src/providers/mistral.ts new file mode 100644 index 0000000000..acbe6d4ec7 --- /dev/null +++ b/packages/types/src/providers/mistral.ts @@ -0,0 +1,59 @@ +import type { ModelInfo } from "../model.js" + +// https://docs.mistral.ai/getting-started/models/models_overview/ +export type MistralModelId = keyof typeof mistralModels + +export const mistralDefaultModelId: MistralModelId = "codestral-latest" + +export const mistralModels = { + "codestral-latest": { + maxTokens: 256_000, + contextWindow: 256_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.3, + outputPrice: 0.9, + }, + "mistral-large-latest": { + maxTokens: 131_000, + contextWindow: 131_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 6.0, + }, + "ministral-8b-latest": { + maxTokens: 131_000, + contextWindow: 131_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.1, + outputPrice: 0.1, + }, + "ministral-3b-latest": { + maxTokens: 131_000, + contextWindow: 131_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.04, + outputPrice: 0.04, + }, + "mistral-small-latest": { + maxTokens: 32_000, + contextWindow: 32_000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.2, + outputPrice: 0.6, + }, + "pixtral-large-latest": { + maxTokens: 131_000, + contextWindow: 131_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 6.0, + }, +} as const satisfies Record + +export const MISTRAL_DEFAULT_TEMPERATURE = 0 diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts new file mode 100644 index 0000000000..e303c179fc --- /dev/null +++ b/packages/types/src/providers/openai.ts @@ -0,0 +1,200 @@ +import type { ModelInfo } from "../model.js" + +// https://openai.com/api/pricing/ +export type OpenAiNativeModelId = keyof typeof openAiNativeModels + +export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4.1" + +export const openAiNativeModels = { + "gpt-4.1": { + maxTokens: 32_768, + contextWindow: 1_047_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2, + outputPrice: 8, + cacheReadsPrice: 0.5, + }, + "gpt-4.1-mini": { + maxTokens: 32_768, + contextWindow: 1_047_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.4, + outputPrice: 1.6, + cacheReadsPrice: 0.1, + }, + "gpt-4.1-nano": { + maxTokens: 32_768, + contextWindow: 1_047_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.1, + outputPrice: 0.4, + cacheReadsPrice: 0.025, + }, + o3: { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 10.0, + outputPrice: 40.0, + cacheReadsPrice: 2.5, + supportsReasoningEffort: true, + reasoningEffort: "medium", + }, + "o3-high": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 10.0, + outputPrice: 40.0, + cacheReadsPrice: 2.5, + reasoningEffort: "high", + }, + "o3-low": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 10.0, + outputPrice: 40.0, + cacheReadsPrice: 2.5, + reasoningEffort: "low", + }, + "o4-mini": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.275, + supportsReasoningEffort: true, + reasoningEffort: "medium", + }, + "o4-mini-high": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.275, + reasoningEffort: "high", + }, + "o4-mini-low": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.275, + reasoningEffort: "low", + }, + "o3-mini": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.55, + supportsReasoningEffort: true, + reasoningEffort: "medium", + }, + "o3-mini-high": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.55, + reasoningEffort: "high", + }, + "o3-mini-low": { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.55, + reasoningEffort: "low", + }, + o1: { + maxTokens: 100_000, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 15, + outputPrice: 60, + cacheReadsPrice: 7.5, + }, + "o1-preview": { + maxTokens: 32_768, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 15, + outputPrice: 60, + cacheReadsPrice: 7.5, + }, + "o1-mini": { + maxTokens: 65_536, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.4, + cacheReadsPrice: 0.55, + }, + "gpt-4.5-preview": { + maxTokens: 16_384, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 75, + outputPrice: 150, + cacheReadsPrice: 37.5, + }, + "gpt-4o": { + maxTokens: 16_384, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, + outputPrice: 10, + cacheReadsPrice: 1.25, + }, + "gpt-4o-mini": { + maxTokens: 16_384, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + cacheReadsPrice: 0.075, + }, +} as const satisfies Record + +export const openAiModelInfoSaneDefaults: ModelInfo = { + maxTokens: -1, + contextWindow: 128_000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, +} + +// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation +// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs +export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" + +export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0 + +export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions" diff --git a/packages/types/src/providers/openrouter.ts b/packages/types/src/providers/openrouter.ts new file mode 100644 index 0000000000..5d6edd844c --- /dev/null +++ b/packages/types/src/providers/openrouter.ts @@ -0,0 +1,75 @@ +import type { ModelInfo } from "../model.js" + +// https://openrouter.ai/models?order=newest&supported_parameters=tools +export const openRouterDefaultModelId = "anthropic/claude-sonnet-4" + +export const openRouterDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: + "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", +} + +export const OPENROUTER_DEFAULT_PROVIDER_NAME = "[default]" + +export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([ + "anthropic/claude-3-haiku", + "anthropic/claude-3-haiku:beta", + "anthropic/claude-3-opus", + "anthropic/claude-3-opus:beta", + "anthropic/claude-3-sonnet", + "anthropic/claude-3-sonnet:beta", + "anthropic/claude-3.5-haiku", + "anthropic/claude-3.5-haiku-20241022", + "anthropic/claude-3.5-haiku-20241022:beta", + "anthropic/claude-3.5-haiku:beta", + "anthropic/claude-3.5-sonnet", + "anthropic/claude-3.5-sonnet-20240620", + "anthropic/claude-3.5-sonnet-20240620:beta", + "anthropic/claude-3.5-sonnet:beta", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-3.7-sonnet:beta", + "anthropic/claude-3.7-sonnet:thinking", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", + "google/gemini-2.5-pro-preview", + "google/gemini-2.5-flash-preview", + "google/gemini-2.5-flash-preview:thinking", + "google/gemini-2.5-flash-preview-05-20", + "google/gemini-2.5-flash-preview-05-20:thinking", + "google/gemini-2.0-flash-001", + "google/gemini-flash-1.5", + "google/gemini-flash-1.5-8b", +]) + +// https://www.anthropic.com/news/3-5-models-and-computer-use +export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([ + "anthropic/claude-3.5-sonnet", + "anthropic/claude-3.5-sonnet:beta", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-3.7-sonnet:beta", + "anthropic/claude-3.7-sonnet:thinking", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", +]) + +export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ + "anthropic/claude-3.7-sonnet:beta", + "anthropic/claude-3.7-sonnet:thinking", + "anthropic/claude-opus-4", + "anthropic/claude-sonnet-4", + "google/gemini-2.5-flash-preview-05-20", + "google/gemini-2.5-flash-preview-05-20:thinking", +]) + +export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([ + "anthropic/claude-3.7-sonnet:thinking", + "google/gemini-2.5-flash-preview-05-20:thinking", +]) diff --git a/packages/types/src/providers/requesty.ts b/packages/types/src/providers/requesty.ts new file mode 100644 index 0000000000..8bc7d720d5 --- /dev/null +++ b/packages/types/src/providers/requesty.ts @@ -0,0 +1,19 @@ +import type { ModelInfo } from "../model.js" + +// Requesty +// https://requesty.ai/router-2 +export const requestyDefaultModelId = "coding/claude-4-sonnet" + +export const requestyDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: + "The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 4 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.", +} diff --git a/packages/types/src/providers/unbound.ts b/packages/types/src/providers/unbound.ts new file mode 100644 index 0000000000..cc73f420d1 --- /dev/null +++ b/packages/types/src/providers/unbound.ts @@ -0,0 +1,14 @@ +import type { ModelInfo } from "../model.js" + +export const unboundDefaultModelId = "anthropic/claude-3-7-sonnet-20250219" + +export const unboundDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, +} diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts new file mode 100644 index 0000000000..11aa1aaa4a --- /dev/null +++ b/packages/types/src/providers/vertex.ts @@ -0,0 +1,225 @@ +import type { ModelInfo } from "../model.js" + +// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude +export type VertexModelId = keyof typeof vertexModels + +export const vertexDefaultModelId: VertexModelId = "claude-sonnet-4@20250514" + +export const vertexModels = { + "gemini-2.5-flash-preview-05-20:thinking": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 3.5, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "gemini-2.5-flash-preview-05-20": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + }, + "gemini-2.5-flash-preview-04-17:thinking": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 3.5, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "gemini-2.5-flash-preview-04-17": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.15, + outputPrice: 0.6, + }, + "gemini-2.5-pro-preview-03-25": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, + outputPrice: 15, + }, + "gemini-2.5-pro-preview-05-06": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, + outputPrice: 15, + }, + "gemini-2.5-pro-exp-03-25": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-pro-exp-02-05": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-2.0-flash-001": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + }, + "gemini-2.0-flash-lite-001": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.075, + outputPrice: 0.3, + }, + "gemini-2.0-flash-thinking-exp-01-21": { + maxTokens: 8192, + contextWindow: 32_768, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "gemini-1.5-flash-002": { + maxTokens: 8192, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.075, + outputPrice: 0.3, + }, + "gemini-1.5-pro-002": { + maxTokens: 8192, + contextWindow: 2_097_152, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 1.25, + outputPrice: 5, + }, + "claude-sonnet-4@20250514": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + supportsReasoningBudget: true, + }, + "claude-opus-4@20250514": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 15.0, + outputPrice: 75.0, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, + }, + "claude-3-7-sonnet@20250219:thinking": { + maxTokens: 64_000, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + supportsReasoningBudget: true, + requiredReasoningBudget: true, + }, + "claude-3-7-sonnet@20250219": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + }, + "claude-3-5-sonnet-v2@20241022": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsComputerUse: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + }, + "claude-3-5-sonnet@20240620": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3.0, + outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + }, + "claude-3-5-haiku@20241022": { + maxTokens: 8192, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.0, + outputPrice: 5.0, + cacheWritesPrice: 1.25, + cacheReadsPrice: 0.1, + }, + "claude-3-opus@20240229": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 15.0, + outputPrice: 75.0, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, + }, + "claude-3-haiku@20240307": { + maxTokens: 4096, + contextWindow: 200_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.25, + outputPrice: 1.25, + cacheWritesPrice: 0.3, + cacheReadsPrice: 0.03, + }, +} as const satisfies Record + +export const VERTEX_REGIONS = [ + { value: "us-east5", label: "us-east5" }, + { value: "us-central1", label: "us-central1" }, + { value: "europe-west1", label: "europe-west1" }, + { value: "europe-west4", label: "europe-west4" }, + { value: "asia-southeast1", label: "asia-southeast1" }, +] diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts new file mode 100644 index 0000000000..bf38cb814b --- /dev/null +++ b/packages/types/src/providers/vscode-llm.ts @@ -0,0 +1,161 @@ +import type { ModelInfo } from "../model.js" + +export type VscodeLlmModelId = keyof typeof vscodeLlmModels + +export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet" + +export const vscodeLlmModels = { + "gpt-3.5-turbo": { + contextWindow: 12114, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-3.5-turbo", + version: "gpt-3.5-turbo-0613", + name: "GPT 3.5 Turbo", + supportsToolCalling: true, + maxInputTokens: 12114, + }, + "gpt-4o-mini": { + contextWindow: 12115, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-4o-mini", + version: "gpt-4o-mini-2024-07-18", + name: "GPT-4o mini", + supportsToolCalling: true, + maxInputTokens: 12115, + }, + "gpt-4": { + contextWindow: 28501, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-4", + version: "gpt-4-0613", + name: "GPT 4", + supportsToolCalling: true, + maxInputTokens: 28501, + }, + "gpt-4-0125-preview": { + contextWindow: 63826, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-4-turbo", + version: "gpt-4-0125-preview", + name: "GPT 4 Turbo", + supportsToolCalling: true, + maxInputTokens: 63826, + }, + "gpt-4o": { + contextWindow: 63827, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-4o", + version: "gpt-4o-2024-11-20", + name: "GPT-4o", + supportsToolCalling: true, + maxInputTokens: 63827, + }, + o1: { + contextWindow: 19827, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "o1-ga", + version: "o1-2024-12-17", + name: "o1 (Preview)", + supportsToolCalling: true, + maxInputTokens: 19827, + }, + "o3-mini": { + contextWindow: 63827, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "o3-mini", + version: "o3-mini-2025-01-31", + name: "o3-mini", + supportsToolCalling: true, + maxInputTokens: 63827, + }, + "claude-3.5-sonnet": { + contextWindow: 81638, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "claude-3.5-sonnet", + version: "claude-3.5-sonnet", + name: "Claude 3.5 Sonnet", + supportsToolCalling: true, + maxInputTokens: 81638, + }, + "gemini-2.0-flash-001": { + contextWindow: 127827, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gemini-2.0-flash", + version: "gemini-2.0-flash-001", + name: "Gemini 2.0 Flash", + supportsToolCalling: false, + maxInputTokens: 127827, + }, + "gemini-2.5-pro": { + contextWindow: 63830, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gemini-2.5-pro", + version: "gemini-2.5-pro-preview-03-25", + name: "Gemini 2.5 Pro (Preview)", + supportsToolCalling: true, + maxInputTokens: 63830, + }, + "o4-mini": { + contextWindow: 111446, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "o4-mini", + version: "o4-mini-2025-04-16", + name: "o4-mini (Preview)", + supportsToolCalling: true, + maxInputTokens: 111446, + }, + "gpt-4.1": { + contextWindow: 111446, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-4.1", + version: "gpt-4.1-2025-04-14", + name: "GPT-4.1 (Preview)", + supportsToolCalling: true, + maxInputTokens: 111446, + }, +} as const satisfies Record< + string, + ModelInfo & { + family: string + version: string + name: string + supportsToolCalling: boolean + maxInputTokens: number + } +> diff --git a/packages/types/src/providers/xai.ts b/packages/types/src/providers/xai.ts new file mode 100644 index 0000000000..ccb8549fcd --- /dev/null +++ b/packages/types/src/providers/xai.ts @@ -0,0 +1,157 @@ +import type { ModelInfo } from "../model.js" + +// https://docs.x.ai/docs/api-reference +export type XAIModelId = keyof typeof xaiModels + +export const xaiDefaultModelId: XAIModelId = "grok-3" + +export const xaiModels = { + "grok-3-beta": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 3.0, + outputPrice: 15.0, + description: "xAI's Grok-3 beta model with 131K context window", + }, + "grok-3-fast-beta": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 5.0, + outputPrice: 25.0, + description: "xAI's Grok-3 fast beta model with 131K context window", + }, + "grok-3-mini-beta": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.3, + outputPrice: 0.5, + description: "xAI's Grok-3 mini beta model with 131K context window", + supportsReasoningEffort: true, + }, + "grok-3-mini-fast-beta": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.6, + outputPrice: 4.0, + description: "xAI's Grok-3 mini fast beta model with 131K context window", + supportsReasoningEffort: true, + }, + "grok-3": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 3.0, + outputPrice: 15.0, + description: "xAI's Grok-3 model with 131K context window", + }, + "grok-3-fast": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 5.0, + outputPrice: 25.0, + description: "xAI's Grok-3 fast model with 131K context window", + }, + "grok-3-mini": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.3, + outputPrice: 0.5, + description: "xAI's Grok-3 mini model with 131K context window", + supportsReasoningEffort: true, + }, + "grok-3-mini-fast": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.6, + outputPrice: 4.0, + description: "xAI's Grok-3 mini fast model with 131K context window", + supportsReasoningEffort: true, + }, + "grok-2-latest": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 model - latest version with 131K context window", + }, + "grok-2": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 model with 131K context window", + }, + "grok-2-1212": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 model (version 1212) with 131K context window", + }, + "grok-2-vision-latest": { + maxTokens: 8192, + contextWindow: 32768, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 Vision model - latest version with image support and 32K context window", + }, + "grok-2-vision": { + maxTokens: 8192, + contextWindow: 32768, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 Vision model with image support and 32K context window", + }, + "grok-2-vision-1212": { + maxTokens: 8192, + contextWindow: 32768, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 2.0, + outputPrice: 10.0, + description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window", + }, + "grok-vision-beta": { + maxTokens: 8192, + contextWindow: 8192, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 5.0, + outputPrice: 15.0, + description: "xAI's Grok Vision Beta model with image support and 8K context window", + }, + "grok-beta": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 5.0, + outputPrice: 15.0, + description: "xAI's Grok Beta model (legacy) with 131K context window", + }, +} as const satisfies Record diff --git a/src/api/providers/__tests__/chutes.test.ts b/src/api/providers/__tests__/chutes.test.ts index 63af600be7..9ee8b8f995 100644 --- a/src/api/providers/__tests__/chutes.test.ts +++ b/src/api/providers/__tests__/chutes.test.ts @@ -3,7 +3,7 @@ import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" -import { ChutesModelId, chutesDefaultModelId, chutesModels } from "../../../shared/api" +import { type ChutesModelId, chutesDefaultModelId, chutesModels } from "@roo-code/types" import { ChutesHandler } from "../chutes" diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index 0fc7509296..6f795d64ca 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -1,9 +1,12 @@ -import { DeepSeekHandler } from "../deepseek" -import { ApiHandlerOptions, deepSeekDefaultModelId } from "../../../shared/api" import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" -// Mock OpenAI client +import { deepSeekDefaultModelId } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../../shared/api" + +import { DeepSeekHandler } from "../deepseek" + const mockCreate = jest.fn() jest.mock("openai", () => { return { diff --git a/src/api/providers/__tests__/gemini.test.ts b/src/api/providers/__tests__/gemini.test.ts index 3016e77364..837948af1d 100644 --- a/src/api/providers/__tests__/gemini.test.ts +++ b/src/api/providers/__tests__/gemini.test.ts @@ -2,9 +2,8 @@ import { Anthropic } from "@anthropic-ai/sdk" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, geminiDefaultModelId } from "@roo-code/types" -import { geminiDefaultModelId } from "../../../shared/api" import { GeminiHandler } from "../gemini" const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219" diff --git a/src/api/providers/__tests__/groq.test.ts b/src/api/providers/__tests__/groq.test.ts index 068f7248fd..1859d6c5c4 100644 --- a/src/api/providers/__tests__/groq.test.ts +++ b/src/api/providers/__tests__/groq.test.ts @@ -3,7 +3,7 @@ import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" -import { GroqModelId, groqDefaultModelId, groqModels } from "../../../shared/api" +import { type GroqModelId, groqDefaultModelId, groqModels } from "@roo-code/types" import { GroqHandler } from "../groq" diff --git a/src/api/providers/__tests__/lmstudio.test.ts b/src/api/providers/__tests__/lmstudio.test.ts index 8667b273d1..084a70665e 100644 --- a/src/api/providers/__tests__/lmstudio.test.ts +++ b/src/api/providers/__tests__/lmstudio.test.ts @@ -1,6 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" -import { LmStudioHandler } from "../lmstudio" +import { LmStudioHandler } from "../lm-studio" import { ApiHandlerOptions } from "../../../shared/api" // Mock OpenAI client diff --git a/src/api/providers/__tests__/xai.test.ts b/src/api/providers/__tests__/xai.test.ts index f17e75277c..41adc5fb32 100644 --- a/src/api/providers/__tests__/xai.test.ts +++ b/src/api/providers/__tests__/xai.test.ts @@ -1,9 +1,10 @@ -import { XAIHandler } from "../xai" -import { xaiDefaultModelId, xaiModels } from "../../../shared/api" import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" -// Mock OpenAI client +import { xaiDefaultModelId, xaiModels } from "@roo-code/types" + +import { XAIHandler } from "../xai" + jest.mock("openai", () => { const createMock = jest.fn() return jest.fn(() => ({ diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index 0ad262593b..c70a15926d 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -2,16 +2,21 @@ import { Anthropic } from "@anthropic-ai/sdk" import { AnthropicVertex } from "@anthropic-ai/vertex-sdk" import { GoogleAuth, JWTInput } from "google-auth-library" -import type { ModelInfo } from "@roo-code/types" - -import { ApiHandlerOptions, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api" +import { + type ModelInfo, + type VertexModelId, + vertexDefaultModelId, + vertexModels, + ANTHROPIC_DEFAULT_MAX_TOKENS, +} from "@roo-code/types" + +import { ApiHandlerOptions } from "../../shared/api" import { safeJsonParse } from "../../shared/safeJsonParse" import { ApiStream } from "../transform/stream" import { addCacheBreakpoints } from "../transform/caching/vertex" import { getModelParams } from "../transform/model-params" -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index d0c4c7c9d3..412f5de621 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -2,14 +2,19 @@ import { Anthropic } from "@anthropic-ai/sdk" import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming" import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources" -import type { ModelInfo } from "@roo-code/types" +import { + type ModelInfo, + type AnthropicModelId, + anthropicDefaultModelId, + anthropicModels, + ANTHROPIC_DEFAULT_MAX_TOKENS, +} from "@roo-code/types" -import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, ApiHandlerOptions } from "../../shared/api" +import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index 6b77521ea4..1abbf5f558 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -15,6 +15,7 @@ export abstract class BaseProvider implements ApiHandler { messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream + abstract getModel(): { id: string; info: ModelInfo } /** diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index cc5de1b548..670371c02e 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -10,26 +10,26 @@ import { import { fromIni } from "@aws-sdk/credential-providers" import { Anthropic } from "@anthropic-ai/sdk" -import type { ModelInfo, ProviderSettings } from "@roo-code/types" - import { - BedrockModelId, + type ModelInfo, + type ProviderSettings, + type BedrockModelId, bedrockDefaultModelId, bedrockModels, bedrockDefaultPromptRouterModelId, -} from "../../shared/api" + BEDROCK_DEFAULT_TEMPERATURE, + BEDROCK_MAX_TOKENS, + BEDROCK_REGION_INFO, +} from "@roo-code/types" + import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import { logger } from "../../utils/logging" import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy" import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types" -import { AMAZON_BEDROCK_REGION_INFO } from "../../shared/aws_regions" import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -const BEDROCK_DEFAULT_TEMPERATURE = 0.3 -const BEDROCK_MAX_TOKENS = 4096 - /************************************************************************************ * * TYPES @@ -729,11 +729,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH *************************************************************************************/ private static getPrefixList(): string[] { - return Object.keys(AMAZON_BEDROCK_REGION_INFO) + return Object.keys(BEDROCK_REGION_INFO) } private static getPrefixForRegion(region: string): string | undefined { - for (const [prefix, info] of Object.entries(AMAZON_BEDROCK_REGION_INFO)) { + for (const [prefix, info] of Object.entries(BEDROCK_REGION_INFO)) { if (info.pattern && region.startsWith(info.pattern)) { return prefix } @@ -742,7 +742,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH } private static prefixIsMultiRegion(arnPrefix: string): boolean { - for (const [prefix, info] of Object.entries(AMAZON_BEDROCK_REGION_INFO)) { + for (const [prefix, info] of Object.entries(BEDROCK_REGION_INFO)) { if (arnPrefix === prefix) { if (info?.multiRegion) return info.multiRegion else return false diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts index 6f7481f180..0fa8741fa3 100644 --- a/src/api/providers/chutes.ts +++ b/src/api/providers/chutes.ts @@ -1,4 +1,6 @@ -import { ApiHandlerOptions, ChutesModelId, chutesDefaultModelId, chutesModels } from "../../shared/api" +import { type ChutesModelId, chutesDefaultModelId, chutesModels } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" diff --git a/src/api/providers/constants.ts b/src/api/providers/constants.ts index 4d6c4672e5..e7c4398324 100644 --- a/src/api/providers/constants.ts +++ b/src/api/providers/constants.ts @@ -2,7 +2,3 @@ export const DEFAULT_HEADERS = { "HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline", "X-Title": "Roo Code", } - -export const ANTHROPIC_DEFAULT_MAX_TOKENS = 8192 - -export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6 diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 47b780d262..de119de6db 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,4 +1,5 @@ -import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api" +import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types" + import type { ApiHandlerOptions } from "../../shared/api" import type { ApiStreamUsageChunk } from "../transform/stream" diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 5e6ced1b33..010a8a9fa2 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -9,7 +9,7 @@ import { OPEN_ROUTER_COMPUTER_USE_MODELS, OPEN_ROUTER_REASONING_BUDGET_MODELS, OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS, -} from "../../../../shared/api" +} from "@roo-code/types" import { getOpenRouterModelEndpoints, getOpenRouterModels } from "../openrouter" diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts index 093fd85888..34f6a111d2 100644 --- a/src/api/providers/fetchers/litellm.ts +++ b/src/api/providers/fetchers/litellm.ts @@ -1,6 +1,8 @@ import axios from "axios" -import { LITELLM_COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api" +import { LITELLM_COMPUTER_USE_MODELS } from "@roo-code/types" + +import type { ModelRecord } from "../../../shared/api" /** * Fetches available models from a LiteLLM server diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index b410d06fc0..a98484ba0e 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -1,15 +1,16 @@ import axios from "axios" import { z } from "zod" -import { type ModelInfo, isModelParameter } from "@roo-code/types" - import { - ApiHandlerOptions, + type ModelInfo, + isModelParameter, OPEN_ROUTER_COMPUTER_USE_MODELS, OPEN_ROUTER_REASONING_BUDGET_MODELS, OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS, anthropicModels, -} from "../../../shared/api" +} from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../../shared/api" import { parseApiPrice } from "../../../shared/cost" /** diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index e5ceffbf43..5addc07a92 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -7,9 +7,9 @@ import { } from "@google/genai" import type { JWTInput } from "google-auth-library" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, type GeminiModelId, geminiDefaultModelId, geminiModels } from "@roo-code/types" -import { ApiHandlerOptions, GeminiModelId, geminiDefaultModelId, geminiModels } from "../../shared/api" +import type { ApiHandlerOptions } from "../../shared/api" import { safeJsonParse } from "../../shared/safeJsonParse" import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format" diff --git a/src/api/providers/glama.ts b/src/api/providers/glama.ts index db2a3f84b6..774d615709 100644 --- a/src/api/providers/glama.ts +++ b/src/api/providers/glama.ts @@ -2,8 +2,10 @@ import { Anthropic } from "@anthropic-ai/sdk" import axios from "axios" import OpenAI from "openai" +import { glamaDefaultModelId, glamaDefaultModelInfo, GLAMA_DEFAULT_TEMPERATURE } from "@roo-code/types" + import { Package } from "../../shared/package" -import { ApiHandlerOptions, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api" +import { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" @@ -12,8 +14,6 @@ import { addCacheBreakpoints } from "../transform/caching/anthropic" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" -const GLAMA_DEFAULT_TEMPERATURE = 0 - const DEFAULT_HEADERS = { "X-Glama-Metadata": JSON.stringify({ labels: [{ key: "app", value: `vscode.${Package.publisher}.${Package.name}` }], diff --git a/src/api/providers/groq.ts b/src/api/providers/groq.ts index 2f4e763b8e..7583edc51c 100644 --- a/src/api/providers/groq.ts +++ b/src/api/providers/groq.ts @@ -1,4 +1,6 @@ -import { ApiHandlerOptions, GroqModelId, groqDefaultModelId, groqModels } from "../../shared/api" // Updated imports for Groq +import { type GroqModelId, groqDefaultModelId, groqModels } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" diff --git a/src/api/providers/index.ts b/src/api/providers/index.ts index dd2a65dd75..b305118188 100644 --- a/src/api/providers/index.ts +++ b/src/api/providers/index.ts @@ -1,22 +1,22 @@ -export { GlamaHandler } from "./glama" +export { AnthropicVertexHandler } from "./anthropic-vertex" export { AnthropicHandler } from "./anthropic" export { AwsBedrockHandler } from "./bedrock" -export { OpenRouterHandler } from "./openrouter" -export { VertexHandler } from "./vertex" -export { AnthropicVertexHandler } from "./anthropic-vertex" -export { OpenAiHandler } from "./openai" -export { OllamaHandler } from "./ollama" -export { LmStudioHandler } from "./lmstudio" -export { GeminiHandler } from "./gemini" -export { OpenAiNativeHandler } from "./openai-native" +export { ChutesHandler } from "./chutes" export { DeepSeekHandler } from "./deepseek" +export { FakeAIHandler } from "./fake-ai" +export { GeminiHandler } from "./gemini" +export { GlamaHandler } from "./glama" +export { GroqHandler } from "./groq" +export { HumanRelayHandler } from "./human-relay" +export { LiteLLMHandler } from "./lite-llm" +export { LmStudioHandler } from "./lm-studio" export { MistralHandler } from "./mistral" -export { VsCodeLmHandler } from "./vscode-lm" -export { UnboundHandler } from "./unbound" +export { OllamaHandler } from "./ollama" +export { OpenAiNativeHandler } from "./openai-native" +export { OpenAiHandler } from "./openai" +export { OpenRouterHandler } from "./openrouter" export { RequestyHandler } from "./requesty" -export { HumanRelayHandler } from "./human-relay" -export { FakeAIHandler } from "./fake-ai" +export { UnboundHandler } from "./unbound" +export { VertexHandler } from "./vertex" +export { VsCodeLmHandler } from "./vscode-lm" export { XAIHandler } from "./xai" -export { GroqHandler } from "./groq" -export { ChutesHandler } from "./chutes" -export { LiteLLMHandler } from "./litellm" diff --git a/src/api/providers/litellm.ts b/src/api/providers/lite-llm.ts similarity index 96% rename from src/api/providers/litellm.ts rename to src/api/providers/lite-llm.ts index fc29f2c5f8..002040df2a 100644 --- a/src/api/providers/litellm.ts +++ b/src/api/providers/lite-llm.ts @@ -1,9 +1,13 @@ import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only -import { ApiHandlerOptions, litellmDefaultModelId, litellmDefaultModelInfo } from "../../shared/api" +import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types" + +import { ApiHandlerOptions } from "../../shared/api" + import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" + import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lm-studio.ts similarity index 96% rename from src/api/providers/lmstudio.ts rename to src/api/providers/lm-studio.ts index bac6b05551..f032e2d560 100644 --- a/src/api/providers/lmstudio.ts +++ b/src/api/providers/lm-studio.ts @@ -2,9 +2,10 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" import axios from "axios" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, openAiModelInfoSaneDefaults, LMSTUDIO_DEFAULT_TEMPERATURE } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" -import { ApiHandlerOptions, openAiModelInfoSaneDefaults } from "../../shared/api" import { XmlMatcher } from "../../utils/xml-matcher" import { convertToOpenAiMessages } from "../transform/openai-format" @@ -13,8 +14,6 @@ import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -const LMSTUDIO_DEFAULT_TEMPERATURE = 0 - export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index 5ed3140aff..7d48b9ef01 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -1,17 +1,16 @@ import { Anthropic } from "@anthropic-ai/sdk" import { Mistral } from "@mistralai/mistralai" -import type { ModelInfo } from "@roo-code/types" +import { type MistralModelId, mistralDefaultModelId, mistralModels, MISTRAL_DEFAULT_TEMPERATURE } from "@roo-code/types" + +import { ApiHandlerOptions } from "../../shared/api" -import { ApiHandlerOptions, mistralDefaultModelId, MistralModelId, mistralModels } from "../../shared/api" import { convertToMistralMessages } from "../transform/mistral-format" import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -const MISTRAL_DEFAULT_TEMPERATURE = 0 - export class MistralHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: Mistral diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts index 4a321895d0..7f384e9a98 100644 --- a/src/api/providers/ollama.ts +++ b/src/api/providers/ollama.ts @@ -2,20 +2,19 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" import axios from "axios" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" -import { ApiHandlerOptions, openAiModelInfoSaneDefaults } from "../../shared/api" import { XmlMatcher } from "../../utils/xml-matcher" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" import { ApiStream } from "../transform/stream" -import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -// Alias for the usage object returned in streaming chunks type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"] export class OllamaHandler extends BaseProvider implements SingleCompletionHandler { diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 41a3a63ae7..3f14e65cc6 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -1,14 +1,15 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import type { ModelInfo } from "@roo-code/types" - import { - ApiHandlerOptions, + type ModelInfo, openAiNativeDefaultModelId, OpenAiNativeModelId, openAiNativeModels, -} from "../../shared/api" + OPENAI_NATIVE_DEFAULT_TEMPERATURE, +} from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" import { calculateApiCostOpenAI } from "../../shared/cost" @@ -19,8 +20,6 @@ import { getModelParams } from "../transform/model-params" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0 - export type OpenAiNativeModel = ReturnType export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 3e7324f5d9..62aa4cc8a3 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -2,9 +2,15 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI, { AzureOpenAI } from "openai" import axios from "axios" -import type { ModelInfo } from "@roo-code/types" +import { + type ModelInfo, + azureOpenAiDefaultApiVersion, + openAiModelInfoSaneDefaults, + DEEP_SEEK_DEFAULT_TEMPERATURE, + OPENAI_AZURE_AI_INFERENCE_PATH, +} from "@roo-code/types" -import { ApiHandlerOptions, azureOpenAiDefaultApiVersion, openAiModelInfoSaneDefaults } from "../../shared/api" +import type { ApiHandlerOptions } from "../../shared/api" import { XmlMatcher } from "../../utils/xml-matcher" @@ -14,12 +20,10 @@ import { convertToSimpleMessages } from "../transform/simple-format" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" -import { DEFAULT_HEADERS, DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants" +import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -export const AZURE_AI_INFERENCE_PATH = "/models/chat/completions" - // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`. @@ -161,7 +165,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const stream = await this.client.chat.completions.create( requestOptions, - isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, + isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) const matcher = new XmlMatcher( @@ -220,7 +224,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const response = await this.client.chat.completions.create( requestOptions, - this._isAzureAiInference(modelUrl) ? { path: AZURE_AI_INFERENCE_PATH } : {}, + this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) yield { @@ -260,7 +264,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const response = await this.client.chat.completions.create( requestOptions, - isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, + isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) return response.choices[0]?.message.content || "" @@ -297,7 +301,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), reasoning_effort: this.getModel().info.reasoningEffort, }, - methodIsAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, + methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) yield* this.handleStreamResponse(stream) @@ -317,7 +321,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const response = await this.client.chat.completions.create( requestOptions, - methodIsAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, + methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) yield { diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index e7a8139864..c0656735e7 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -2,12 +2,14 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" import { - ApiHandlerOptions, - ModelRecord, openRouterDefaultModelId, openRouterDefaultModelInfo, + OPENROUTER_DEFAULT_PROVIDER_NAME, OPEN_ROUTER_PROMPT_CACHING_MODELS, -} from "../../shared/api" + DEEP_SEEK_DEFAULT_TEMPERATURE, +} from "@roo-code/types" + +import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStreamChunk } from "../transform/stream" @@ -20,12 +22,10 @@ import { getModelParams } from "../transform/model-params" import { getModels } from "./fetchers/modelCache" import { getModelEndpoints } from "./fetchers/modelEndpointCache" -import { DEFAULT_HEADERS, DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants" +import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler } from "../index" -const OPENROUTER_DEFAULT_PROVIDER_NAME = "[default]" - // Add custom interface for OpenRouter params. type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { transforms?: string[] diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 8317ad250c..8af0b9aa42 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -1,9 +1,9 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, requestyDefaultModelId, requestyDefaultModelInfo } from "@roo-code/types" -import { ApiHandlerOptions, ModelRecord, requestyDefaultModelId, requestyDefaultModelInfo } from "../../shared/api" +import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" import { calculateApiCostOpenAI } from "../../shared/cost" import { convertToOpenAiMessages } from "../transform/openai-format" diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index 2a41d5416c..2c7bd1e575 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -1,7 +1,9 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { ApiHandlerOptions, unboundDefaultModelId, unboundDefaultModelInfo } from "../../shared/api" +import { unboundDefaultModelId, unboundDefaultModelInfo } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" diff --git a/src/api/providers/vertex.ts b/src/api/providers/vertex.ts index 2bc940de7a..fdd51e0666 100644 --- a/src/api/providers/vertex.ts +++ b/src/api/providers/vertex.ts @@ -1,6 +1,6 @@ -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, type VertexModelId, vertexDefaultModelId, vertexModels } from "@roo-code/types" -import { ApiHandlerOptions, VertexModelId, vertexDefaultModelId, vertexModels } from "../../shared/api" +import type { ApiHandlerOptions } from "../../shared/api" import { GeminiHandler } from "./gemini" import { SingleCompletionHandler } from "../index" diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index 5990193ecb..6474371bee 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -1,10 +1,10 @@ import { Anthropic } from "@anthropic-ai/sdk" import * as vscode from "vscode" -import type { ModelInfo } from "@roo-code/types" +import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types" +import type { ApiHandlerOptions } from "../../shared/api" import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils" -import { ApiHandlerOptions, openAiModelInfoSaneDefaults } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format" diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index 280b6800f0..adcd0d92bf 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -1,7 +1,9 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { ApiHandlerOptions, XAIModelId, xaiDefaultModelId, xaiModels } from "../../shared/api" +import { type XAIModelId, xaiDefaultModelId, xaiModels } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" diff --git a/src/api/transform/__tests__/model-params.test.ts b/src/api/transform/__tests__/model-params.test.ts index 2eabe1c7fa..a1132e2886 100644 --- a/src/api/transform/__tests__/model-params.test.ts +++ b/src/api/transform/__tests__/model-params.test.ts @@ -1,8 +1,6 @@ // npx jest src/api/transform/__tests__/model-params.test.ts -import type { ModelInfo } from "@roo-code/types" - -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../../providers/constants" +import { type ModelInfo, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" import { getModelParams } from "../model-params" diff --git a/src/api/transform/bedrock-converse-format.ts b/src/api/transform/bedrock-converse-format.ts index 68d21e4d5b..1f53067c84 100644 --- a/src/api/transform/bedrock-converse-format.ts +++ b/src/api/transform/bedrock-converse-format.ts @@ -1,7 +1,26 @@ import { Anthropic } from "@anthropic-ai/sdk" import { ConversationRole, Message, ContentBlock } from "@aws-sdk/client-bedrock-runtime" -import { MessageContent } from "../../shared/api" +interface BedrockMessageContent { + type: "text" | "image" | "video" | "tool_use" | "tool_result" + text?: string + source?: { + type: "base64" + data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" + } + // Video specific fields + format?: string + s3Location?: { + uri: string + bucketOwner?: string + } + // Tool use and result fields + toolUseId?: string + name?: string + input?: any + output?: any // Used for tool_result type +} /** * Convert Anthropic messages to Bedrock Converse format @@ -24,7 +43,7 @@ export function convertToBedrockConverseMessages(anthropicMessages: Anthropic.Me // Process complex content types const content = anthropicMessage.content.map((block) => { - const messageBlock = block as MessageContent & { + const messageBlock = block as BedrockMessageContent & { id?: string tool_use_id?: string content?: Array<{ type: string; text: string }> diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts index 2fb5012655..d9a2c749ca 100644 --- a/src/api/transform/model-params.ts +++ b/src/api/transform/model-params.ts @@ -1,6 +1,5 @@ -import type { ModelInfo, ProviderSettings } from "@roo-code/types" +import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../providers/constants" import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared/api" import { diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 4d43c4ff9d..62a7d8046e 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -22,8 +22,11 @@ import { type TerminalActionId, type TerminalActionPromptType, type HistoryItem, + type CloudUserInfo, + requestyDefaultModelId, + openRouterDefaultModelId, + glamaDefaultModelId, ORGANIZATION_ALLOW_ALL, - CloudUserInfo, } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" import { CloudService } from "@roo-code/cloud" @@ -31,7 +34,6 @@ import { CloudService } from "@roo-code/cloud" import { t } from "../../i18n" import { setPanel } from "../../activate/registerCommands" import { Package } from "../../shared/package" -import { requestyDefaultModelId, openRouterDefaultModelId, glamaDefaultModelId } from "../../shared/api" import { findLast } from "../../shared/array" import { supportPrompt } from "../../shared/support-prompt" import { GlobalFileNames } from "../../shared/globalFileNames" @@ -1520,8 +1522,10 @@ export class ClineProvider maxReadFileLine: stateValues.maxReadFileLine ?? -1, maxConcurrentFileReads: experiments.isEnabled( stateValues.experiments ?? experimentDefault, - EXPERIMENT_IDS.CONCURRENT_FILE_READS - ) ? (stateValues.maxConcurrentFileReads ?? 15) : 1, + EXPERIMENT_IDS.CONCURRENT_FILE_READS, + ) + ? (stateValues.maxConcurrentFileReads ?? 15) + : 1, historyPreviewCollapsed: stateValues.historyPreviewCollapsed ?? false, cloudUserInfo, organizationAllowList, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 2595c048ce..659d60f31a 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -30,7 +30,7 @@ import { exportSettings, importSettings } from "../config/importExport" import { getOpenAiModels } from "../../api/providers/openai" import { getOllamaModels } from "../../api/providers/ollama" import { getVsCodeLmModels } from "../../api/providers/vscode-lm" -import { getLmStudioModels } from "../../api/providers/lmstudio" +import { getLmStudioModels } from "../../api/providers/lm-studio" import { openMention } from "../mentions" import { TelemetrySetting } from "../../shared/TelemetrySetting" import { getWorkspacePath } from "../../utils/path" @@ -1434,4 +1434,4 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We break } } -} \ No newline at end of file +} diff --git a/src/shared/__tests__/api.test.ts b/src/shared/__tests__/api.test.ts index b71b68095f..d003c3bd81 100644 --- a/src/shared/__tests__/api.test.ts +++ b/src/shared/__tests__/api.test.ts @@ -1,8 +1,6 @@ // npx jest src/shared/__tests__/api.test.ts -import type { ModelInfo, ProviderSettings } from "@roo-code/types" - -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../../api/providers/constants" +import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" import { getModelMaxOutputTokens, shouldUseReasoningBudget, shouldUseReasoningEffort } from "../api" diff --git a/src/shared/api.ts b/src/shared/api.ts index 8e26523a07..8ad8828658 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -1,1976 +1,10 @@ -import type { ModelInfo, ProviderSettings } from "@roo-code/types" +import { type ModelInfo, type ProviderSettings, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../api/providers/constants" +// ApiHandlerOptions export type ApiHandlerOptions = Omit -// Anthropic -// https://docs.anthropic.com/en/docs/about-claude/models -export type AnthropicModelId = keyof typeof anthropicModels -export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514" -export const anthropicModels = { - "claude-sonnet-4-20250514": { - maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false. - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, // $3 per million input tokens - outputPrice: 15.0, // $15 per million output tokens - cacheWritesPrice: 3.75, // $3.75 per million tokens - cacheReadsPrice: 0.3, // $0.30 per million tokens - supportsReasoningBudget: true, - }, - "claude-opus-4-20250514": { - maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 15.0, // $15 per million input tokens - outputPrice: 75.0, // $75 per million output tokens - cacheWritesPrice: 18.75, // $18.75 per million tokens - cacheReadsPrice: 1.5, // $1.50 per million tokens - supportsReasoningBudget: true, - }, - "claude-3-7-sonnet-20250219:thinking": { - maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k. - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, // $3 per million input tokens - outputPrice: 15.0, // $15 per million output tokens - cacheWritesPrice: 3.75, // $3.75 per million tokens - cacheReadsPrice: 0.3, // $0.30 per million tokens - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "claude-3-7-sonnet-20250219": { - maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here. - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, // $3 per million input tokens - outputPrice: 15.0, // $15 per million output tokens - cacheWritesPrice: 3.75, // $3.75 per million tokens - cacheReadsPrice: 0.3, // $0.30 per million tokens - }, - "claude-3-5-sonnet-20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, // $3 per million input tokens - outputPrice: 15.0, // $15 per million output tokens - cacheWritesPrice: 3.75, // $3.75 per million tokens - cacheReadsPrice: 0.3, // $0.30 per million tokens - }, - "claude-3-5-haiku-20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.0, - outputPrice: 5.0, - cacheWritesPrice: 1.25, - cacheReadsPrice: 0.1, - }, - "claude-3-opus-20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 15.0, - outputPrice: 75.0, - cacheWritesPrice: 18.75, - cacheReadsPrice: 1.5, - }, - "claude-3-haiku-20240307": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.25, - outputPrice: 1.25, - cacheWritesPrice: 0.3, - cacheReadsPrice: 0.03, - }, -} as const satisfies Record // as const assertion makes the object deeply readonly - -// Amazon Bedrock -// https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html -export interface MessageContent { - type: "text" | "image" | "video" | "tool_use" | "tool_result" - text?: string - source?: { - type: "base64" - data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" - } - // Video specific fields - format?: string - s3Location?: { - uri: string - bucketOwner?: string - } - // Tool use and result fields - toolUseId?: string - name?: string - input?: any - output?: any // Used for tool_result type -} - -export type BedrockModelId = keyof typeof bedrockModels -export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-sonnet-4-20250514-v1:0" -export const bedrockDefaultPromptRouterModelId: BedrockModelId = "anthropic.claude-3-sonnet-20240229-v1:0" - -// March, 12 2025 - updated prices to match US-West-2 list price shown at https://aws.amazon.com/bedrock/pricing/ -// including older models that are part of the default prompt routers AWS enabled for GA of the promot router feature -export const bedrockModels = { - "amazon.nova-pro-v1:0": { - maxTokens: 5000, - contextWindow: 300_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: true, - inputPrice: 0.8, - outputPrice: 3.2, - cacheWritesPrice: 0.8, // per million tokens - cacheReadsPrice: 0.2, // per million tokens - minTokensPerCachePoint: 1, - maxCachePoints: 1, - cachableFields: ["system"], - }, - "amazon.nova-pro-latency-optimized-v1:0": { - maxTokens: 5000, - contextWindow: 300_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 1.0, - outputPrice: 4.0, - cacheWritesPrice: 1.0, // per million tokens - cacheReadsPrice: 0.25, // per million tokens - description: "Amazon Nova Pro with latency optimized inference", - }, - "amazon.nova-lite-v1:0": { - maxTokens: 5000, - contextWindow: 300_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: true, - inputPrice: 0.06, - outputPrice: 0.24, - cacheWritesPrice: 0.06, // per million tokens - cacheReadsPrice: 0.015, // per million tokens - minTokensPerCachePoint: 1, - maxCachePoints: 1, - cachableFields: ["system"], - }, - "amazon.nova-micro-v1:0": { - maxTokens: 5000, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: true, - inputPrice: 0.035, - outputPrice: 0.14, - cacheWritesPrice: 0.035, // per million tokens - cacheReadsPrice: 0.00875, // per million tokens - minTokensPerCachePoint: 1, - maxCachePoints: 1, - cachableFields: ["system"], - }, - "anthropic.claude-sonnet-4-20250514-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - minTokensPerCachePoint: 1024, - maxCachePoints: 4, - cachableFields: ["system", "messages", "tools"], - }, - "anthropic.claude-opus-4-20250514-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 15.0, - outputPrice: 75.0, - cacheWritesPrice: 18.75, - cacheReadsPrice: 1.5, - minTokensPerCachePoint: 1024, - maxCachePoints: 4, - cachableFields: ["system", "messages", "tools"], - }, - "anthropic.claude-3-7-sonnet-20250219-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - minTokensPerCachePoint: 1024, - maxCachePoints: 4, - cachableFields: ["system", "messages", "tools"], - }, - "anthropic.claude-3-5-sonnet-20241022-v2:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - minTokensPerCachePoint: 1024, - maxCachePoints: 4, - cachableFields: ["system", "messages", "tools"], - }, - "anthropic.claude-3-5-haiku-20241022-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 0.8, - outputPrice: 4.0, - cacheWritesPrice: 1.0, - cacheReadsPrice: 0.08, - minTokensPerCachePoint: 2048, - maxCachePoints: 4, - cachableFields: ["system", "messages", "tools"], - }, - "anthropic.claude-3-5-sonnet-20240620-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "anthropic.claude-3-opus-20240229-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15.0, - outputPrice: 75.0, - }, - "anthropic.claude-3-sonnet-20240229-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "anthropic.claude-3-haiku-20240307-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.25, - outputPrice: 1.25, - }, - "anthropic.claude-2-1-v1:0": { - maxTokens: 4096, - contextWindow: 100_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 8.0, - outputPrice: 24.0, - description: "Claude 2.1", - }, - "anthropic.claude-2-0-v1:0": { - maxTokens: 4096, - contextWindow: 100_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 8.0, - outputPrice: 24.0, - description: "Claude 2.0", - }, - "anthropic.claude-instant-v1:0": { - maxTokens: 4096, - contextWindow: 100_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.8, - outputPrice: 2.4, - description: "Claude Instant", - }, - "deepseek.r1-v1:0": { - maxTokens: 32_768, - contextWindow: 128_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.35, - outputPrice: 5.4, - }, - "meta.llama3-3-70b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - description: "Llama 3.3 Instruct (70B)", - }, - "meta.llama3-2-90b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - description: "Llama 3.2 Instruct (90B)", - }, - "meta.llama3-2-11b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.16, - outputPrice: 0.16, - description: "Llama 3.2 Instruct (11B)", - }, - "meta.llama3-2-3b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.15, - description: "Llama 3.2 Instruct (3B)", - }, - "meta.llama3-2-1b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.1, - outputPrice: 0.1, - description: "Llama 3.2 Instruct (1B)", - }, - "meta.llama3-1-405b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 2.4, - outputPrice: 2.4, - description: "Llama 3.1 Instruct (405B)", - }, - "meta.llama3-1-70b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - description: "Llama 3.1 Instruct (70B)", - }, - "meta.llama3-1-70b-instruct-latency-optimized-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.9, - outputPrice: 0.9, - description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)", - }, - "meta.llama3-1-8b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.22, - outputPrice: 0.22, - description: "Llama 3.1 Instruct (8B)", - }, - "meta.llama3-70b-instruct-v1:0": { - maxTokens: 2048, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 2.65, - outputPrice: 3.5, - }, - "meta.llama3-8b-instruct-v1:0": { - maxTokens: 2048, - contextWindow: 4_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.6, - }, - "amazon.titan-text-lite-v1:0": { - maxTokens: 4096, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.2, - description: "Amazon Titan Text Lite", - }, - "amazon.titan-text-express-v1:0": { - maxTokens: 4096, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.2, - outputPrice: 0.6, - description: "Amazon Titan Text Express", - }, - "amazon.titan-text-embeddings-v1:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.1, - description: "Amazon Titan Text Embeddings", - }, - "amazon.titan-text-embeddings-v2:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.02, - description: "Amazon Titan Text Embeddings V2", - }, -} as const satisfies Record - -// Glama -// https://glama.ai/models -export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet" -export const glamaDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - description: - "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", -} - -// Requesty -// https://requesty.ai/router-2 -export const requestyDefaultModelId = "coding/claude-4-sonnet" -export const requestyDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - description: - "The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 4 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.", -} - -// OpenRouter -// https://openrouter.ai/models?order=newest&supported_parameters=tools -export const openRouterDefaultModelId = "anthropic/claude-sonnet-4" -export const openRouterDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - description: - "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", -} - -// Vertex AI -// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude -export type VertexModelId = keyof typeof vertexModels -export const vertexDefaultModelId: VertexModelId = "claude-sonnet-4@20250514" -export const vertexModels = { - "gemini-2.5-flash-preview-05-20:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 3.5, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-05-20": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 0.6, - }, - "gemini-2.5-flash-preview-04-17:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 3.5, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-04-17": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.6, - }, - "gemini-2.5-pro-preview-03-25": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, - }, - "gemini-2.5-pro-preview-05-06": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, - }, - "gemini-2.5-pro-exp-03-25": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-pro-exp-02-05": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-001": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 0.6, - }, - "gemini-2.0-flash-lite-001": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.075, - outputPrice: 0.3, - }, - "gemini-2.0-flash-thinking-exp-01-21": { - maxTokens: 8192, - contextWindow: 32_768, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-002": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.075, - outputPrice: 0.3, - }, - "gemini-1.5-pro-002": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 1.25, - outputPrice: 5, - }, - "claude-sonnet-4@20250514": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - supportsReasoningBudget: true, - }, - "claude-opus-4@20250514": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 15.0, - outputPrice: 75.0, - cacheWritesPrice: 18.75, - cacheReadsPrice: 1.5, - }, - "claude-3-7-sonnet@20250219:thinking": { - maxTokens: 64_000, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "claude-3-7-sonnet@20250219": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - }, - "claude-3-5-sonnet-v2@20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - }, - "claude-3-5-sonnet@20240620": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - }, - "claude-3-5-haiku@20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.0, - outputPrice: 5.0, - cacheWritesPrice: 1.25, - cacheReadsPrice: 0.1, - }, - "claude-3-opus@20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 15.0, - outputPrice: 75.0, - cacheWritesPrice: 18.75, - cacheReadsPrice: 1.5, - }, - "claude-3-haiku@20240307": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.25, - outputPrice: 1.25, - cacheWritesPrice: 0.3, - cacheReadsPrice: 0.03, - }, -} as const satisfies Record - -export const openAiModelInfoSaneDefaults: ModelInfo = { - maxTokens: -1, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, -} - -// Gemini -// https://ai.google.dev/gemini-api/docs/models/gemini -export type GeminiModelId = keyof typeof geminiModels -export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001" -export const geminiModels = { - "gemini-2.5-flash-preview-04-17:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 3.5, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-04-17": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.6, - }, - "gemini-2.5-flash-preview-05-20:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 3.5, - cacheReadsPrice: 0.0375, - cacheWritesPrice: 1.0, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-05-20": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 0.6, - cacheReadsPrice: 0.0375, - cacheWritesPrice: 1.0, - }, - "gemini-2.5-pro-exp-03-25": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.5-pro-preview-03-25": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, - tiers: [ - { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, - outputPrice: 15, - cacheReadsPrice: 0.625, - }, - ], - }, - "gemini-2.5-pro-preview-05-06": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, - tiers: [ - { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, - outputPrice: 15, - cacheReadsPrice: 0.625, - }, - ], - }, - "gemini-2.0-flash-001": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.1, - outputPrice: 0.4, - cacheReadsPrice: 0.025, - cacheWritesPrice: 1.0, - }, - "gemini-2.0-flash-lite-preview-02-05": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-pro-exp-02-05": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-thinking-exp-01-21": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-thinking-exp-1219": { - maxTokens: 8192, - contextWindow: 32_767, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-exp": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-002": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, // This is the pricing for prompts above 128k tokens. - outputPrice: 0.6, - cacheReadsPrice: 0.0375, - cacheWritesPrice: 1.0, - tiers: [ - { - contextWindow: 128_000, - inputPrice: 0.075, - outputPrice: 0.3, - cacheReadsPrice: 0.01875, - }, - { - contextWindow: Infinity, - inputPrice: 0.15, - outputPrice: 0.6, - cacheReadsPrice: 0.0375, - }, - ], - }, - "gemini-1.5-flash-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-8b-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-pro-002": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-pro-exp-0827": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-exp-1206": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, -} as const satisfies Record - -// OpenAI Native -// https://openai.com/api/pricing/ -export type OpenAiNativeModelId = keyof typeof openAiNativeModels -export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4.1" -export const openAiNativeModels = { - "gpt-4.1": { - maxTokens: 32_768, - contextWindow: 1_047_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2, - outputPrice: 8, - cacheReadsPrice: 0.5, - }, - "gpt-4.1-mini": { - maxTokens: 32_768, - contextWindow: 1_047_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.4, - outputPrice: 1.6, - cacheReadsPrice: 0.1, - }, - "gpt-4.1-nano": { - maxTokens: 32_768, - contextWindow: 1_047_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.1, - outputPrice: 0.4, - cacheReadsPrice: 0.025, - }, - o3: { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 10.0, - outputPrice: 40.0, - cacheReadsPrice: 2.5, - supportsReasoningEffort: true, - reasoningEffort: "medium", - }, - "o3-high": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 10.0, - outputPrice: 40.0, - cacheReadsPrice: 2.5, - reasoningEffort: "high", - }, - "o3-low": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 10.0, - outputPrice: 40.0, - cacheReadsPrice: 2.5, - reasoningEffort: "low", - }, - "o4-mini": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.275, - supportsReasoningEffort: true, - reasoningEffort: "medium", - }, - "o4-mini-high": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.275, - reasoningEffort: "high", - }, - "o4-mini-low": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.275, - reasoningEffort: "low", - }, - "o3-mini": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.55, - supportsReasoningEffort: true, - reasoningEffort: "medium", - }, - "o3-mini-high": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.55, - reasoningEffort: "high", - }, - "o3-mini-low": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.55, - reasoningEffort: "low", - }, - o1: { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 15, - outputPrice: 60, - cacheReadsPrice: 7.5, - }, - "o1-preview": { - maxTokens: 32_768, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 15, - outputPrice: 60, - cacheReadsPrice: 7.5, - }, - "o1-mini": { - maxTokens: 65_536, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 1.1, - outputPrice: 4.4, - cacheReadsPrice: 0.55, - }, - "gpt-4.5-preview": { - maxTokens: 16_384, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 75, - outputPrice: 150, - cacheReadsPrice: 37.5, - }, - "gpt-4o": { - maxTokens: 16_384, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 10, - cacheReadsPrice: 1.25, - }, - "gpt-4o-mini": { - maxTokens: 16_384, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 0.6, - cacheReadsPrice: 0.075, - }, -} as const satisfies Record - -// DeepSeek -// https://platform.deepseek.com/docs/api -export type DeepSeekModelId = keyof typeof deepSeekModels -export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" -export const deepSeekModels = { - "deepseek-chat": { - maxTokens: 8192, - contextWindow: 64_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 0.27, // $0.27 per million tokens (cache miss) - outputPrice: 1.1, // $1.10 per million tokens - cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss) - cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit). - description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, - }, - "deepseek-reasoner": { - maxTokens: 8192, - contextWindow: 64_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 0.55, // $0.55 per million tokens (cache miss) - outputPrice: 2.19, // $2.19 per million tokens - cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss) - cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit) - description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`, - }, -} as const satisfies Record - -// Azure OpenAI -// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation -// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs -export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" - -// Mistral -// https://docs.mistral.ai/getting-started/models/models_overview/ -export type MistralModelId = keyof typeof mistralModels -export const mistralDefaultModelId: MistralModelId = "codestral-latest" -export const mistralModels = { - "codestral-latest": { - maxTokens: 256_000, - contextWindow: 256_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.9, - }, - "mistral-large-latest": { - maxTokens: 131_000, - contextWindow: 131_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 6.0, - }, - "ministral-8b-latest": { - maxTokens: 131_000, - contextWindow: 131_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1, - outputPrice: 0.1, - }, - "ministral-3b-latest": { - maxTokens: 131_000, - contextWindow: 131_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.04, - outputPrice: 0.04, - }, - "mistral-small-latest": { - maxTokens: 32_000, - contextWindow: 32_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.2, - outputPrice: 0.6, - }, - "pixtral-large-latest": { - maxTokens: 131_000, - contextWindow: 131_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 6.0, - }, -} as const satisfies Record - -// Unbound Security -// https://www.unboundsecurity.ai/ai-gateway -export const unboundDefaultModelId = "anthropic/claude-3-7-sonnet-20250219" -export const unboundDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, -} - -// LiteLLM -// https://docs.litellm.ai/ -export const litellmDefaultModelId = "claude-3-7-sonnet-20250219" -export const litellmDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, -} - -export const LITELLM_COMPUTER_USE_MODELS = new Set([ - "claude-3-5-sonnet-latest", - "claude-opus-4-20250514", - "claude-sonnet-4-20250514", - "claude-3-7-sonnet-latest", - "claude-3-7-sonnet-20250219", - "claude-3-5-sonnet-20241022", - "vertex_ai/claude-3-5-sonnet", - "vertex_ai/claude-3-5-sonnet-v2", - "vertex_ai/claude-3-5-sonnet-v2@20241022", - "vertex_ai/claude-3-7-sonnet@20250219", - "vertex_ai/claude-opus-4@20250514", - "vertex_ai/claude-sonnet-4@20250514", - "openrouter/anthropic/claude-3.5-sonnet", - "openrouter/anthropic/claude-3.5-sonnet:beta", - "openrouter/anthropic/claude-3.7-sonnet", - "openrouter/anthropic/claude-3.7-sonnet:beta", - "anthropic.claude-opus-4-20250514-v1:0", - "anthropic.claude-sonnet-4-20250514-v1:0", - "anthropic.claude-3-7-sonnet-20250219-v1:0", - "anthropic.claude-3-5-sonnet-20241022-v2:0", - "us.anthropic.claude-3-5-sonnet-20241022-v2:0", - "us.anthropic.claude-3-7-sonnet-20250219-v1:0", - "us.anthropic.claude-opus-4-20250514-v1:0", - "us.anthropic.claude-sonnet-4-20250514-v1:0", - "eu.anthropic.claude-3-5-sonnet-20241022-v2:0", - "eu.anthropic.claude-3-7-sonnet-20250219-v1:0", - "eu.anthropic.claude-opus-4-20250514-v1:0", - "eu.anthropic.claude-sonnet-4-20250514-v1:0", - "snowflake/claude-3-5-sonnet", -]) - -// xAI -// https://docs.x.ai/docs/api-reference -export type XAIModelId = keyof typeof xaiModels -export const xaiDefaultModelId: XAIModelId = "grok-3" -export const xaiModels = { - "grok-3-beta": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - description: "xAI's Grok-3 beta model with 131K context window", - }, - "grok-3-fast-beta": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 5.0, - outputPrice: 25.0, - description: "xAI's Grok-3 fast beta model with 131K context window", - }, - "grok-3-mini-beta": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.5, - description: "xAI's Grok-3 mini beta model with 131K context window", - supportsReasoningEffort: true, - }, - "grok-3-mini-fast-beta": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.6, - outputPrice: 4.0, - description: "xAI's Grok-3 mini fast beta model with 131K context window", - supportsReasoningEffort: true, - }, - "grok-3": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - description: "xAI's Grok-3 model with 131K context window", - }, - "grok-3-fast": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 5.0, - outputPrice: 25.0, - description: "xAI's Grok-3 fast model with 131K context window", - }, - "grok-3-mini": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.5, - description: "xAI's Grok-3 mini model with 131K context window", - supportsReasoningEffort: true, - }, - "grok-3-mini-fast": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.6, - outputPrice: 4.0, - description: "xAI's Grok-3 mini fast model with 131K context window", - supportsReasoningEffort: true, - }, - "grok-2-latest": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 model - latest version with 131K context window", - }, - "grok-2": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 model with 131K context window", - }, - "grok-2-1212": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 model (version 1212) with 131K context window", - }, - "grok-2-vision-latest": { - maxTokens: 8192, - contextWindow: 32768, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 Vision model - latest version with image support and 32K context window", - }, - "grok-2-vision": { - maxTokens: 8192, - contextWindow: 32768, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 Vision model with image support and 32K context window", - }, - "grok-2-vision-1212": { - maxTokens: 8192, - contextWindow: 32768, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 2.0, - outputPrice: 10.0, - description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window", - }, - "grok-vision-beta": { - maxTokens: 8192, - contextWindow: 8192, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 5.0, - outputPrice: 15.0, - description: "xAI's Grok Vision Beta model with image support and 8K context window", - }, - "grok-beta": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 5.0, - outputPrice: 15.0, - description: "xAI's Grok Beta model (legacy) with 131K context window", - }, -} as const satisfies Record - -export type VscodeLlmModelId = keyof typeof vscodeLlmModels -export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet" -export const vscodeLlmModels = { - "gpt-3.5-turbo": { - contextWindow: 12114, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-3.5-turbo", - version: "gpt-3.5-turbo-0613", - name: "GPT 3.5 Turbo", - supportsToolCalling: true, - maxInputTokens: 12114, - }, - "gpt-4o-mini": { - contextWindow: 12115, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-4o-mini", - version: "gpt-4o-mini-2024-07-18", - name: "GPT-4o mini", - supportsToolCalling: true, - maxInputTokens: 12115, - }, - "gpt-4": { - contextWindow: 28501, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-4", - version: "gpt-4-0613", - name: "GPT 4", - supportsToolCalling: true, - maxInputTokens: 28501, - }, - "gpt-4-0125-preview": { - contextWindow: 63826, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-4-turbo", - version: "gpt-4-0125-preview", - name: "GPT 4 Turbo", - supportsToolCalling: true, - maxInputTokens: 63826, - }, - "gpt-4o": { - contextWindow: 63827, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-4o", - version: "gpt-4o-2024-11-20", - name: "GPT-4o", - supportsToolCalling: true, - maxInputTokens: 63827, - }, - o1: { - contextWindow: 19827, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "o1-ga", - version: "o1-2024-12-17", - name: "o1 (Preview)", - supportsToolCalling: true, - maxInputTokens: 19827, - }, - "o3-mini": { - contextWindow: 63827, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "o3-mini", - version: "o3-mini-2025-01-31", - name: "o3-mini", - supportsToolCalling: true, - maxInputTokens: 63827, - }, - "claude-3.5-sonnet": { - contextWindow: 81638, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "claude-3.5-sonnet", - version: "claude-3.5-sonnet", - name: "Claude 3.5 Sonnet", - supportsToolCalling: true, - maxInputTokens: 81638, - }, - "gemini-2.0-flash-001": { - contextWindow: 127827, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gemini-2.0-flash", - version: "gemini-2.0-flash-001", - name: "Gemini 2.0 Flash", - supportsToolCalling: false, - maxInputTokens: 127827, - }, - "gemini-2.5-pro": { - contextWindow: 63830, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gemini-2.5-pro", - version: "gemini-2.5-pro-preview-03-25", - name: "Gemini 2.5 Pro (Preview)", - supportsToolCalling: true, - maxInputTokens: 63830, - }, - "o4-mini": { - contextWindow: 111446, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "o4-mini", - version: "o4-mini-2025-04-16", - name: "o4-mini (Preview)", - supportsToolCalling: true, - maxInputTokens: 111446, - }, - "gpt-4.1": { - contextWindow: 111446, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - family: "gpt-4.1", - version: "gpt-4.1-2025-04-14", - name: "GPT-4.1 (Preview)", - supportsToolCalling: true, - maxInputTokens: 111446, - }, -} as const satisfies Record< - string, - ModelInfo & { - family: string - version: string - name: string - supportsToolCalling: boolean - maxInputTokens: number - } -> - -// Groq -// https://console.groq.com/docs/models -export type GroqModelId = - | "llama-3.1-8b-instant" - | "llama-3.3-70b-versatile" - | "meta-llama/llama-4-scout-17b-16e-instruct" - | "meta-llama/llama-4-maverick-17b-128e-instruct" - | "mistral-saba-24b" - | "qwen-qwq-32b" - | "deepseek-r1-distill-llama-70b" -export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defaulting to Llama3 70B Versatile -export const groqModels = { - // Models based on API response: https://api.groq.com/openai/v1/models - "llama-3.1-8b-instant": { - maxTokens: 131072, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Meta Llama 3.1 8B Instant model, 128K context.", - }, - "llama-3.3-70b-versatile": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Meta Llama 3.3 70B Versatile model, 128K context.", - }, - "meta-llama/llama-4-scout-17b-16e-instruct": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Meta Llama 4 Scout 17B Instruct model, 128K context.", - }, - "meta-llama/llama-4-maverick-17b-128e-instruct": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.", - }, - "mistral-saba-24b": { - maxTokens: 32768, - contextWindow: 32768, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Mistral Saba 24B model, 32K context.", - }, - "qwen-qwq-32b": { - maxTokens: 131072, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Alibaba Qwen QwQ 32B model, 128K context.", - }, - "deepseek-r1-distill-llama-70b": { - maxTokens: 131072, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 Distill Llama 70B model, 128K context.", - }, -} as const satisfies Record - -// Chutes AI -// https://llm.chutes.ai/v1 (OpenAI compatible) -export type ChutesModelId = - | "deepseek-ai/DeepSeek-R1-0528" - | "deepseek-ai/DeepSeek-R1" - | "deepseek-ai/DeepSeek-V3" - | "unsloth/Llama-3.3-70B-Instruct" - | "chutesai/Llama-4-Scout-17B-16E-Instruct" - | "unsloth/Mistral-Nemo-Instruct-2407" - | "unsloth/gemma-3-12b-it" - | "NousResearch/DeepHermes-3-Llama-3-8B-Preview" - | "unsloth/gemma-3-4b-it" - | "nvidia/Llama-3_3-Nemotron-Super-49B-v1" - | "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1" - | "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8" - | "deepseek-ai/DeepSeek-V3-Base" - | "deepseek-ai/DeepSeek-R1-Zero" - | "deepseek-ai/DeepSeek-V3-0324" - | "Qwen/Qwen3-235B-A22B" - | "Qwen/Qwen3-32B" - | "Qwen/Qwen3-30B-A3B" - | "Qwen/Qwen3-14B" - | "Qwen/Qwen3-8B" - | "microsoft/MAI-DS-R1-FP8" - | "tngtech/DeepSeek-R1T-Chimera" - -export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528" -export const chutesModels = { - "deepseek-ai/DeepSeek-R1-0528": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 0528 model.", - }, - "deepseek-ai/DeepSeek-R1": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 model.", - }, - "deepseek-ai/DeepSeek-V3": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 model.", - }, - "unsloth/Llama-3.3-70B-Instruct": { - maxTokens: 32768, // From Groq - contextWindow: 131072, // From Groq - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Llama 3.3 70B Instruct model.", - }, - "chutesai/Llama-4-Scout-17B-16E-Instruct": { - maxTokens: 32768, - contextWindow: 512000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context.", - }, - "unsloth/Mistral-Nemo-Instruct-2407": { - maxTokens: 32768, - contextWindow: 128000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Mistral Nemo Instruct model.", - }, - "unsloth/gemma-3-12b-it": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Gemma 3 12B IT model.", - }, - "NousResearch/DeepHermes-3-Llama-3-8B-Preview": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nous DeepHermes 3 Llama 3 8B Preview model.", - }, - "unsloth/gemma-3-4b-it": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Gemma 3 4B IT model.", - }, - "nvidia/Llama-3_3-Nemotron-Super-49B-v1": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nvidia Llama 3.3 Nemotron Super 49B model.", - }, - "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nvidia Llama 3.1 Nemotron Ultra 253B model.", - }, - "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": { - maxTokens: 32768, - contextWindow: 256000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model.", - }, - "deepseek-ai/DeepSeek-V3-Base": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 Base model.", - }, - "deepseek-ai/DeepSeek-R1-Zero": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 Zero model.", - }, - "deepseek-ai/DeepSeek-V3-0324": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 (0324) model.", - }, - "Qwen/Qwen3-235B-A22B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 235B A22B model.", - }, - "Qwen/Qwen3-32B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 32B model.", - }, - "Qwen/Qwen3-30B-A3B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 30B A3B model.", - }, - "Qwen/Qwen3-14B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 14B model.", - }, - "Qwen/Qwen3-8B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 8B model.", - }, - "microsoft/MAI-DS-R1-FP8": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Microsoft MAI-DS-R1 FP8 model.", - }, - "tngtech/DeepSeek-R1T-Chimera": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "TNGTech DeepSeek R1T Chimera model.", - }, -} as const satisfies Record - -/** - * Constants - */ - -// These models support prompt caching. -export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([ - "anthropic/claude-3-haiku", - "anthropic/claude-3-haiku:beta", - "anthropic/claude-3-opus", - "anthropic/claude-3-opus:beta", - "anthropic/claude-3-sonnet", - "anthropic/claude-3-sonnet:beta", - "anthropic/claude-3.5-haiku", - "anthropic/claude-3.5-haiku-20241022", - "anthropic/claude-3.5-haiku-20241022:beta", - "anthropic/claude-3.5-haiku:beta", - "anthropic/claude-3.5-sonnet", - "anthropic/claude-3.5-sonnet-20240620", - "anthropic/claude-3.5-sonnet-20240620:beta", - "anthropic/claude-3.5-sonnet:beta", - "anthropic/claude-3.7-sonnet", - "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-sonnet-4", - "anthropic/claude-opus-4", - "google/gemini-2.5-pro-preview", - "google/gemini-2.5-flash-preview", - "google/gemini-2.5-flash-preview:thinking", - "google/gemini-2.5-flash-preview-05-20", - "google/gemini-2.5-flash-preview-05-20:thinking", - "google/gemini-2.0-flash-001", - "google/gemini-flash-1.5", - "google/gemini-flash-1.5-8b", -]) - -// https://www.anthropic.com/news/3-5-models-and-computer-use -export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([ - "anthropic/claude-3.5-sonnet", - "anthropic/claude-3.5-sonnet:beta", - "anthropic/claude-3.7-sonnet", - "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-sonnet-4", - "anthropic/claude-opus-4", -]) - -export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ - "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-opus-4", - "anthropic/claude-sonnet-4", - "google/gemini-2.5-flash-preview-05-20", - "google/gemini-2.5-flash-preview-05-20:thinking", -]) - -export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([ - "anthropic/claude-3.7-sonnet:thinking", - "google/gemini-2.5-flash-preview-05-20:thinking", -]) +// RouterName const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const @@ -1986,10 +20,14 @@ export function toRouterName(value?: string): RouterName { throw new Error(`Invalid router name: ${value}`) } +// RouterModels + export type ModelRecord = Record export type RouterModels = Record +// Reasoning + export const shouldUseReasoningBudget = ({ model, settings, @@ -2009,6 +47,8 @@ export const shouldUseReasoningEffort = ({ export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384 export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192 +// Max Tokens + export const getModelMaxOutputTokens = ({ modelId, model, @@ -2034,11 +74,8 @@ export const getModelMaxOutputTokens = ({ return model.maxTokens ?? undefined } -/** - * Options for fetching models from different providers. - * This is a discriminated union type where the provider property determines - * which other properties are required. - */ +// GetModelsOptions + export type GetModelsOptions = | { provider: "openrouter" } | { provider: "glama" } diff --git a/src/shared/aws_regions.ts b/src/shared/aws_regions.ts deleted file mode 100644 index 416197947e..0000000000 --- a/src/shared/aws_regions.ts +++ /dev/null @@ -1,80 +0,0 @@ -/** - * AWS Region information mapping - * Maps region prefixes to their full region IDs and descriptions - */ -export const AMAZON_BEDROCK_REGION_INFO: Record< - string, - { - regionId: string - description: string - pattern?: string - multiRegion?: boolean - } -> = { - /* - * This JSON generated by AWS's AI assistant - Amazon Q on March 29, 2025 - * - * - Africa (Cape Town) region does not appear to support Amazon Bedrock at this time. - * - Some Asia Pacific regions, such as Asia Pacific (Hong Kong) and Asia Pacific (Jakarta), are not listed among the supported regions for Bedrock services. - * - Middle East regions, including Middle East (Bahrain) and Middle East (UAE), are not mentioned in the list of supported regions for Bedrock. [3] - * - China regions (Beijing and Ningxia) are not listed as supported for Amazon Bedrock. - * - Some newer or specialized AWS regions may not have Bedrock support yet. - */ - "us.": { regionId: "us-east-1", description: "US East (N. Virginia)", pattern: "us-", multiRegion: true }, - "use.": { regionId: "us-east-1", description: "US East (N. Virginia)" }, - "use1.": { regionId: "us-east-1", description: "US East (N. Virginia)" }, - "use2.": { regionId: "us-east-2", description: "US East (Ohio)" }, - "usw.": { regionId: "us-west-2", description: "US West (Oregon)" }, - "usw2.": { regionId: "us-west-2", description: "US West (Oregon)" }, - "ug.": { - regionId: "us-gov-west-1", - description: "AWS GovCloud (US-West)", - pattern: "us-gov-", - multiRegion: true, - }, - "uge1.": { regionId: "us-gov-east-1", description: "AWS GovCloud (US-East)" }, - "ugw1.": { regionId: "us-gov-west-1", description: "AWS GovCloud (US-West)" }, - "eu.": { regionId: "eu-west-1", description: "Europe (Ireland)", pattern: "eu-", multiRegion: true }, - "euw1.": { regionId: "eu-west-1", description: "Europe (Ireland)" }, - "euw2.": { regionId: "eu-west-2", description: "Europe (London)" }, - "euw3.": { regionId: "eu-west-3", description: "Europe (Paris)" }, - "euc1.": { regionId: "eu-central-1", description: "Europe (Frankfurt)" }, - "euc2.": { regionId: "eu-central-2", description: "Europe (Zurich)" }, - "eun1.": { regionId: "eu-north-1", description: "Europe (Stockholm)" }, - "eus1.": { regionId: "eu-south-1", description: "Europe (Milan)" }, - "eus2.": { regionId: "eu-south-2", description: "Europe (Spain)" }, - "ap.": { - regionId: "ap-southeast-1", - description: "Asia Pacific (Singapore)", - pattern: "ap-", - multiRegion: true, - }, - "ape1.": { regionId: "ap-east-1", description: "Asia Pacific (Hong Kong)" }, - "apne1.": { regionId: "ap-northeast-1", description: "Asia Pacific (Tokyo)" }, - "apne2.": { regionId: "ap-northeast-2", description: "Asia Pacific (Seoul)" }, - "apne3.": { regionId: "ap-northeast-3", description: "Asia Pacific (Osaka)" }, - "aps1.": { regionId: "ap-south-1", description: "Asia Pacific (Mumbai)" }, - "aps2.": { regionId: "ap-south-2", description: "Asia Pacific (Hyderabad)" }, - "apse1.": { regionId: "ap-southeast-1", description: "Asia Pacific (Singapore)" }, - "apse2.": { regionId: "ap-southeast-2", description: "Asia Pacific (Sydney)" }, - "ca.": { regionId: "ca-central-1", description: "Canada (Central)", pattern: "ca-", multiRegion: true }, - "cac1.": { regionId: "ca-central-1", description: "Canada (Central)" }, - "sa.": { regionId: "sa-east-1", description: "South America (São Paulo)", pattern: "sa-", multiRegion: true }, - "sae1.": { regionId: "sa-east-1", description: "South America (São Paulo)" }, - - //these are not official - they weren't generated by Amazon Q nor were found in - //the AWS documentation but another roo contributor found apac. was needed so I've - //added the pattern of the other geo zones - "apac.": { regionId: "ap-southeast-1", description: "Default APAC region", pattern: "ap-", multiRegion: true }, - "emea.": { regionId: "eu-west-1", description: "Default EMEA region", pattern: "eu-", multiRegion: true }, - "amer.": { regionId: "us-east-1", description: "Default Americas region", pattern: "us-", multiRegion: true }, -} - -// Extract unique region IDs from REGION_INFO and create the AWS_REGIONS array -export const AWS_REGIONS = Object.values(AMAZON_BEDROCK_REGION_INFO) - // Extract all region IDs - .map((info) => ({ value: info.regionId, label: info.regionId })) - // Filter to unique region IDs (remove duplicates) - .filter((region, index, self) => index === self.findIndex((r) => r.value === region.value)) - // Sort alphabetically by region ID - .sort((a, b) => a.value.localeCompare(b.value)) diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 49f6b45278..905f34a860 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -3,15 +3,15 @@ import { convertHeadersToObject } from "./utils/headers" import { useDebounce } from "react-use" import { VSCodeLink } from "@vscode/webview-ui-toolkit/react" -import type { ProviderName, ProviderSettings } from "@roo-code/types" - import { + type ProviderName, + type ProviderSettings, openRouterDefaultModelId, requestyDefaultModelId, glamaDefaultModelId, unboundDefaultModelId, litellmDefaultModelId, -} from "@roo/api" +} from "@roo-code/types" import { vscode } from "@src/utils/vscode" import { validateApiConfiguration } from "@src/utils/validate" diff --git a/webview-ui/src/components/settings/__tests__/ApiOptions.test.tsx b/webview-ui/src/components/settings/__tests__/ApiOptions.test.tsx index ca0eb5f7fe..17421d3960 100644 --- a/webview-ui/src/components/settings/__tests__/ApiOptions.test.tsx +++ b/webview-ui/src/components/settings/__tests__/ApiOptions.test.tsx @@ -3,9 +3,7 @@ import { render, screen, fireEvent } from "@testing-library/react" import { QueryClient, QueryClientProvider } from "@tanstack/react-query" -import type { ModelInfo, ProviderSettings } from "@roo-code/types" - -import { openAiModelInfoSaneDefaults } from "@roo/api" +import { type ModelInfo, type ProviderSettings, openAiModelInfoSaneDefaults } from "@roo-code/types" import { ExtensionStateContextProvider } from "@src/context/ExtensionStateContext" diff --git a/webview-ui/src/components/settings/constants.ts b/webview-ui/src/components/settings/constants.ts index bd1ce69eb6..5b808643e5 100644 --- a/webview-ui/src/components/settings/constants.ts +++ b/webview-ui/src/components/settings/constants.ts @@ -1,6 +1,6 @@ -import type { ProviderName, ModelInfo } from "@roo-code/types" - import { + type ProviderName, + type ModelInfo, anthropicModels, bedrockModels, deepSeekModels, @@ -11,9 +11,7 @@ import { xaiModels, groqModels, chutesModels, -} from "@roo/api" - -export { AWS_REGIONS } from "@roo/aws_regions" +} from "@roo-code/types" export const MODELS_BY_PROVIDER: Partial>> = { anthropic: anthropicModels, @@ -50,11 +48,3 @@ export const PROVIDERS = [ { value: "chutes", label: "Chutes AI" }, { value: "litellm", label: "LiteLLM" }, ].sort((a, b) => a.label.localeCompare(b.label)) - -export const VERTEX_REGIONS = [ - { value: "us-east5", label: "us-east5" }, - { value: "us-central1", label: "us-central1" }, - { value: "europe-west1", label: "europe-west1" }, - { value: "europe-west4", label: "europe-west4" }, - { value: "asia-southeast1", label: "asia-southeast1" }, -] diff --git a/webview-ui/src/components/settings/providers/Bedrock.tsx b/webview-ui/src/components/settings/providers/Bedrock.tsx index a672ff406e..eb4000dcac 100644 --- a/webview-ui/src/components/settings/providers/Bedrock.tsx +++ b/webview-ui/src/components/settings/providers/Bedrock.tsx @@ -2,12 +2,11 @@ import { useCallback } from "react" import { Checkbox } from "vscrui" import { VSCodeTextField, VSCodeRadio, VSCodeRadioGroup } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings, ModelInfo } from "@roo-code/types" +import { type ProviderSettings, type ModelInfo, BEDROCK_REGIONS } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui" -import { AWS_REGIONS } from "../constants" import { inputEventTransform, noTransform } from "../transforms" type BedrockProps = { @@ -89,7 +88,7 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo - {AWS_REGIONS.map(({ value, label }) => ( + {BEDROCK_REGIONS.map(({ value, label }) => ( {label} diff --git a/webview-ui/src/components/settings/providers/Glama.tsx b/webview-ui/src/components/settings/providers/Glama.tsx index 933c5be1c0..85c218954a 100644 --- a/webview-ui/src/components/settings/providers/Glama.tsx +++ b/webview-ui/src/components/settings/providers/Glama.tsx @@ -1,9 +1,9 @@ import { useCallback } from "react" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings, OrganizationAllowList } from "@roo-code/types" +import { type ProviderSettings, type OrganizationAllowList, glamaDefaultModelId } from "@roo-code/types" -import { RouterModels, glamaDefaultModelId } from "@roo/api" +import type { RouterModels } from "@roo/api" import { useAppTranslation } from "@src/i18n/TranslationContext" import { getGlamaAuthUrl } from "@src/oauth/urls" diff --git a/webview-ui/src/components/settings/providers/LiteLLM.tsx b/webview-ui/src/components/settings/providers/LiteLLM.tsx index 8566348d72..6da99e9892 100644 --- a/webview-ui/src/components/settings/providers/LiteLLM.tsx +++ b/webview-ui/src/components/settings/providers/LiteLLM.tsx @@ -1,9 +1,9 @@ import { useCallback, useState, useEffect, useRef } from "react" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings, OrganizationAllowList } from "@roo-code/types" +import { type ProviderSettings, type OrganizationAllowList, litellmDefaultModelId } from "@roo-code/types" -import { litellmDefaultModelId, RouterName } from "@roo/api" +import { RouterName } from "@roo/api" import { ExtensionMessage } from "@roo/ExtensionMessage" import { vscode } from "@src/utils/vscode" diff --git a/webview-ui/src/components/settings/providers/Mistral.tsx b/webview-ui/src/components/settings/providers/Mistral.tsx index 115b0b6b80..666d1421c3 100644 --- a/webview-ui/src/components/settings/providers/Mistral.tsx +++ b/webview-ui/src/components/settings/providers/Mistral.tsx @@ -1,9 +1,9 @@ import { useCallback } from "react" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings } from "@roo-code/types" +import { type ProviderSettings, mistralDefaultModelId } from "@roo-code/types" -import { RouterModels, mistralDefaultModelId } from "@roo/api" +import type { RouterModels } from "@roo/api" import { useAppTranslation } from "@src/i18n/TranslationContext" import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink" diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx index a68f78a051..587608a25e 100644 --- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx +++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx @@ -3,9 +3,15 @@ import { useEvent } from "react-use" import { Checkbox } from "vscrui" import { VSCodeButton, VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings, ModelInfo, ReasoningEffort, OrganizationAllowList } from "@roo-code/types" +import { + type ProviderSettings, + type ModelInfo, + type ReasoningEffort, + type OrganizationAllowList, + azureOpenAiDefaultApiVersion, + openAiModelInfoSaneDefaults, +} from "@roo-code/types" -import { azureOpenAiDefaultApiVersion, openAiModelInfoSaneDefaults } from "@roo/api" import { ExtensionMessage } from "@roo/ExtensionMessage" import { useAppTranslation } from "@src/i18n/TranslationContext" diff --git a/webview-ui/src/components/settings/providers/OpenRouter.tsx b/webview-ui/src/components/settings/providers/OpenRouter.tsx index fb2306f6f7..e579e58fb5 100644 --- a/webview-ui/src/components/settings/providers/OpenRouter.tsx +++ b/webview-ui/src/components/settings/providers/OpenRouter.tsx @@ -4,9 +4,9 @@ import { Checkbox } from "vscrui" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" import { ExternalLinkIcon } from "@radix-ui/react-icons" -import type { ProviderSettings, OrganizationAllowList } from "@roo-code/types" +import { type ProviderSettings, type OrganizationAllowList, openRouterDefaultModelId } from "@roo-code/types" -import { RouterModels, openRouterDefaultModelId } from "@roo/api" +import type { RouterModels } from "@roo/api" import { useAppTranslation } from "@src/i18n/TranslationContext" import { getOpenRouterAuthUrl } from "@src/oauth/urls" diff --git a/webview-ui/src/components/settings/providers/Requesty.tsx b/webview-ui/src/components/settings/providers/Requesty.tsx index dd675afc06..617e401211 100644 --- a/webview-ui/src/components/settings/providers/Requesty.tsx +++ b/webview-ui/src/components/settings/providers/Requesty.tsx @@ -1,9 +1,9 @@ import { useCallback, useState } from "react" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings, OrganizationAllowList } from "@roo-code/types" +import { type ProviderSettings, type OrganizationAllowList, requestyDefaultModelId } from "@roo-code/types" -import { RouterModels, requestyDefaultModelId } from "@roo/api" +import type { RouterModels } from "@roo/api" import { vscode } from "@src/utils/vscode" import { useAppTranslation } from "@src/i18n/TranslationContext" diff --git a/webview-ui/src/components/settings/providers/Unbound.tsx b/webview-ui/src/components/settings/providers/Unbound.tsx index 9676f14796..d0a862f20c 100644 --- a/webview-ui/src/components/settings/providers/Unbound.tsx +++ b/webview-ui/src/components/settings/providers/Unbound.tsx @@ -2,9 +2,9 @@ import { useCallback, useState, useRef } from "react" import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" import { useQueryClient } from "@tanstack/react-query" -import type { ProviderSettings, OrganizationAllowList } from "@roo-code/types" +import { type ProviderSettings, type OrganizationAllowList, unboundDefaultModelId } from "@roo-code/types" -import { RouterModels, unboundDefaultModelId } from "@roo/api" +import type { RouterModels } from "@roo/api" import { useAppTranslation } from "@src/i18n/TranslationContext" import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink" diff --git a/webview-ui/src/components/settings/providers/Vertex.tsx b/webview-ui/src/components/settings/providers/Vertex.tsx index 1bf475eb0a..19a136927a 100644 --- a/webview-ui/src/components/settings/providers/Vertex.tsx +++ b/webview-ui/src/components/settings/providers/Vertex.tsx @@ -1,13 +1,12 @@ import { useCallback } from "react" import { VSCodeLink, VSCodeTextField } from "@vscode/webview-ui-toolkit/react" -import type { ProviderSettings } from "@roo-code/types" +import { type ProviderSettings, VERTEX_REGIONS } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui" import { inputEventTransform } from "../transforms" -import { VERTEX_REGIONS } from "../constants" type VertexProps = { apiConfiguration: ProviderSettings diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts index f656c702dd..9f77cbe370 100644 --- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts +++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts @@ -1,7 +1,7 @@ -import type { ProviderName, ProviderSettings, ModelInfo } from "@roo-code/types" - import { - RouterModels, + type ProviderName, + type ProviderSettings, + type ModelInfo, anthropicDefaultModelId, anthropicModels, bedrockDefaultModelId, @@ -30,7 +30,9 @@ import { glamaDefaultModelId, unboundDefaultModelId, litellmDefaultModelId, -} from "@roo/api" +} from "@roo-code/types" + +import type { RouterModels } from "@roo/api" import { useRouterModels } from "./useRouterModels" import { useOpenRouterModelProviders } from "./useOpenRouterModelProviders"