diff --git a/README.md b/README.md index a98990377..2a9b94069 100644 --- a/README.md +++ b/README.md @@ -361,7 +361,7 @@ For production workloads, use the [WhatsApp Cloud API](https://developers.facebo 3 native drivers (Anthropic, Gemini, OpenAI-compatible) route to 27 providers: -Anthropic, Gemini, OpenAI, Groq, DeepSeek, OpenRouter, Together, Mistral, Fireworks, Cohere, Perplexity, xAI, AI21, Cerebras, SambaNova, HuggingFace, Replicate, Ollama, vLLM, LM Studio, Qwen, MiniMax, Zhipu, Moonshot, Qianfan, Bedrock, and more. +Anthropic, Gemini, OpenAI, Groq, Volcengine, DeepSeek, OpenRouter, Together, Mistral, Fireworks, Cohere, Perplexity, xAI, AI21, Cerebras, SambaNova, HuggingFace, Replicate, Ollama, vLLM, LM Studio, Qwen, MiniMax, Zhipu, Moonshot, Qianfan, Bedrock, and more. Intelligent routing with task complexity scoring, automatic fallback, cost tracking, and per-model pricing. diff --git a/crates/openfang-cli/src/launcher.rs b/crates/openfang-cli/src/launcher.rs index 18a8f1236..fd8c29d5a 100644 --- a/crates/openfang-cli/src/launcher.rs +++ b/crates/openfang-cli/src/launcher.rs @@ -20,6 +20,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ ("ANTHROPIC_API_KEY", "Anthropic"), ("OPENAI_API_KEY", "OpenAI"), ("DEEPSEEK_API_KEY", "DeepSeek"), + ("VOLCENGINE_API_KEY", "Volcano Engine"), ("GEMINI_API_KEY", "Gemini"), ("GOOGLE_API_KEY", "Gemini"), ("GROQ_API_KEY", "Groq"), diff --git a/crates/openfang-cli/src/main.rs b/crates/openfang-cli/src/main.rs index a60f4725f..2374b16e4 100644 --- a/crates/openfang-cli/src/main.rs +++ b/crates/openfang-cli/src/main.rs @@ -1463,6 +1463,18 @@ fn provider_list() -> Vec<(&'static str, &'static str, &'static str, &'static st ("groq", "GROQ_API_KEY", "llama-3.3-70b-versatile", "Groq"), ("gemini", "GEMINI_API_KEY", "gemini-2.5-flash", "Gemini"), ("deepseek", "DEEPSEEK_API_KEY", "deepseek-chat", "DeepSeek"), + ( + "volcengine_coding", + "VOLCENGINE_API_KEY", + "ark-code-latest", + "Volcano Engine Coding Plan", + ), + ( + "volcengine", + "VOLCENGINE_API_KEY", + "doubao-seed-1-6-251015", + "Volcano Engine", + ), ( "anthropic", "ANTHROPIC_API_KEY", @@ -4774,6 +4786,7 @@ fn provider_to_env_var(provider: &str) -> String { "perplexity" => "PERPLEXITY_API_KEY".to_string(), "cohere" => "COHERE_API_KEY".to_string(), "xai" => "XAI_API_KEY".to_string(), + "volcengine" | "doubao" | "volcengine_coding" => "VOLCENGINE_API_KEY".to_string(), "brave" => "BRAVE_API_KEY".to_string(), "tavily" => "TAVILY_API_KEY".to_string(), other => format!("{}_API_KEY", other.to_uppercase()), @@ -4825,6 +4838,15 @@ pub(crate) fn test_api_key(provider: &str, env_var: &str) -> bool { .get("https://openrouter.ai/api/v1/models") .bearer_auth(&key) .send(), + "volcengine" | "doubao" => { + let base = openfang_types::model_catalog::VOLCENGINE_BASE_URL.trim_end_matches('/'); + client.get(format!("{base}/models")).bearer_auth(&key).send() + } + "volcengine_coding" => { + let base = openfang_types::model_catalog::VOLCENGINE_CODING_BASE_URL + .trim_end_matches('/'); + client.get(format!("{base}/models")).bearer_auth(&key).send() + } // Bedrock bearer tokens are only valid against bedrock-runtime, not the // management plane. There is no cheap region-agnostic probe, so skip. "bedrock" => return true, diff --git a/crates/openfang-cli/src/tui/screens/init_wizard.rs b/crates/openfang-cli/src/tui/screens/init_wizard.rs index 7f256082e..1dd19571f 100644 --- a/crates/openfang-cli/src/tui/screens/init_wizard.rs +++ b/crates/openfang-cli/src/tui/screens/init_wizard.rs @@ -68,6 +68,22 @@ const PROVIDERS: &[ProviderInfo] = &[ needs_key: true, hint: "", }, + ProviderInfo { + name: "volcengine_coding", + display: "Volcano Engine Coding Plan", + env_var: "VOLCENGINE_API_KEY", + default_model: "ark-code-latest", + needs_key: true, + hint: "", + }, + ProviderInfo { + name: "volcengine", + display: "Volcano Engine", + env_var: "VOLCENGINE_API_KEY", + default_model: "doubao-seed-1-6-251015", + needs_key: true, + hint: "", + }, ProviderInfo { name: "openrouter", display: "OpenRouter", @@ -328,6 +344,18 @@ struct ModelEntry { cost: String, } +fn model_cost_label(provider: &str, input_cost_per_m: f64, output_cost_per_m: f64) -> String { + if input_cost_per_m == 0.0 && output_cost_per_m == 0.0 { + if provider == "volcengine_coding" { + "see provider pricing".to_string() + } else { + "free".to_string() + } + } else { + format!("${:.2}/${:.2}", input_cost_per_m, output_cost_per_m) + } +} + const ROUTING_TIER_NAMES: [&str; 3] = ["Fast", "Balanced", "Frontier"]; const ROUTING_TIER_DESC: [&str; 3] = [ "quick lookups, greetings, simple Q&A", @@ -515,11 +543,7 @@ impl State { for (i, m) in models.iter().enumerate() { let tier = tier_label(m.tier); - let cost = if m.input_cost_per_m == 0.0 && m.output_cost_per_m == 0.0 { - "free".to_string() - } else { - format!("${:.2}/${:.2}", m.input_cost_per_m, m.output_cost_per_m) - }; + let cost = model_cost_label(p.name, m.input_cost_per_m, m.output_cost_per_m); if m.id == p.default_model { default_idx = i; @@ -1200,6 +1224,24 @@ fn handle_migration_key( } } +#[cfg(test)] +mod tests { + use super::model_cost_label; + + #[test] + fn volcengine_coding_zero_cost_models_do_not_render_as_free() { + assert_eq!( + model_cost_label("volcengine_coding", 0.0, 0.0), + "see provider pricing" + ); + } + + #[test] + fn zero_cost_models_for_other_providers_still_render_as_free() { + assert_eq!(model_cost_label("ollama", 0.0, 0.0), "free"); + } +} + // ── Routing step key handler ─────────────────────────────────────────────── fn handle_routing_key(state: &mut State, code: KeyCode) { diff --git a/crates/openfang-cli/src/tui/screens/welcome.rs b/crates/openfang-cli/src/tui/screens/welcome.rs index 768a51ca4..c0c1f9fc5 100644 --- a/crates/openfang-cli/src/tui/screens/welcome.rs +++ b/crates/openfang-cli/src/tui/screens/welcome.rs @@ -32,6 +32,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ ("ANTHROPIC_API_KEY", "Anthropic"), ("OPENAI_API_KEY", "OpenAI"), ("DEEPSEEK_API_KEY", "DeepSeek"), + ("VOLCENGINE_API_KEY", "Volcano Engine"), ("GEMINI_API_KEY", "Gemini"), ("GOOGLE_API_KEY", "Gemini"), ("GROQ_API_KEY", "Groq"), diff --git a/crates/openfang-cli/src/tui/screens/wizard.rs b/crates/openfang-cli/src/tui/screens/wizard.rs index f15b8f8c8..9fd5570f1 100644 --- a/crates/openfang-cli/src/tui/screens/wizard.rs +++ b/crates/openfang-cli/src/tui/screens/wizard.rs @@ -85,6 +85,18 @@ const PROVIDERS: &[ProviderInfo] = &[ default_model: "qwen-plus", needs_key: true, }, + ProviderInfo { + name: "volcengine_coding", + env_var: "VOLCENGINE_API_KEY", + default_model: "ark-code-latest", + needs_key: true, + }, + ProviderInfo { + name: "volcengine", + env_var: "VOLCENGINE_API_KEY", + default_model: "doubao-seed-1-6-251015", + needs_key: true, + }, ProviderInfo { name: "perplexity", env_var: "PERPLEXITY_API_KEY", diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index 7a39e7baf..2ba815651 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -1884,8 +1884,11 @@ impl OpenFangKernel { // Look up model's actual context window from the catalog let ctx_window = self.model_catalog.read().ok().and_then(|cat| { - cat.find_model(&entry.manifest.model.model) - .map(|m| m.context_window as usize) + cat.find_model_for_provider( + &entry.manifest.model.model, + &entry.manifest.model.provider, + ) + .map(|m| m.context_window as usize) }); let (tx, rx) = tokio::sync::mpsc::channel::(64); @@ -2174,12 +2177,14 @@ impl OpenFangKernel { // Persist usage to database (same as non-streaming path) let model = &manifest.model.model; - let cost = MeteringEngine::estimate_cost_with_catalog( + let provider = &manifest.model.provider; + let cost = MeteringEngine::estimate_cost_with_catalog_for_provider( &kernel_clone .model_catalog .read() .unwrap_or_else(|e| e.into_inner()), model, + provider, result.total_usage.input_tokens, result.total_usage.output_tokens, ); @@ -2663,7 +2668,7 @@ impl OpenFangKernel { // Look up model's actual context window from the catalog let ctx_window = self.model_catalog.read().ok().and_then(|cat| { - cat.find_model(&manifest.model.model) + cat.find_model_for_provider(&manifest.model.model, &manifest.model.provider) .map(|m| m.context_window as usize) }); @@ -2735,9 +2740,11 @@ impl OpenFangKernel { // Record usage in the metering engine (uses catalog pricing as single source of truth) let model = &manifest.model.model; - let cost = MeteringEngine::estimate_cost_with_catalog( + let provider = &manifest.model.provider; + let cost = MeteringEngine::estimate_cost_with_catalog_for_provider( &self.model_catalog.read().unwrap_or_else(|e| e.into_inner()), model, + provider, result.total_usage.input_tokens, result.total_usage.output_tokens, ); @@ -3258,9 +3265,11 @@ impl OpenFangKernel { .unwrap_or((0, 0)); let model = &entry.manifest.model.model; - let cost = MeteringEngine::estimate_cost_with_catalog( + let provider = &entry.manifest.model.provider; + let cost = MeteringEngine::estimate_cost_with_catalog_for_provider( &self.model_catalog.read().unwrap_or_else(|e| e.into_inner()), model, + provider, input_tokens, output_tokens, ); diff --git a/crates/openfang-kernel/src/metering.rs b/crates/openfang-kernel/src/metering.rs index e34dff4e9..36c9c041a 100644 --- a/crates/openfang-kernel/src/metering.rs +++ b/crates/openfang-kernel/src/metering.rs @@ -206,6 +206,25 @@ impl MeteringEngine { input_cost + output_cost } + /// Like `estimate_cost_with_catalog` but scopes the catalog lookup to `provider`. + /// + /// Prevents cross-provider pricing errors when multiple providers share the same + /// short model name (e.g. `minimax-m2.5` exists under both MiniMax and Volcengine). + pub fn estimate_cost_with_catalog_for_provider( + catalog: &openfang_runtime::model_catalog::ModelCatalog, + model: &str, + provider: &str, + input_tokens: u64, + output_tokens: u64, + ) -> f64 { + let (input_per_m, output_per_m) = catalog + .pricing_for_provider(model, provider) + .unwrap_or((1.0, 3.0)); + let input_cost = (input_tokens as f64 / 1_000_000.0) * input_per_m; + let output_cost = (output_tokens as f64 / 1_000_000.0) * output_per_m; + input_cost + output_cost + } + /// Clean up old usage records. pub fn cleanup(&self, days: u32) -> OpenFangResult { self.store.cleanup_old(days) diff --git a/crates/openfang-runtime/src/drivers/mod.rs b/crates/openfang-runtime/src/drivers/mod.rs index 50359216f..08ff71d33 100644 --- a/crates/openfang-runtime/src/drivers/mod.rs +++ b/crates/openfang-runtime/src/drivers/mod.rs @@ -522,6 +522,7 @@ pub fn detect_available_provider() -> Option<(&'static str, &'static str, &'stat ("gemini", "gemini-2.5-flash", "GEMINI_API_KEY"), ("groq", "llama-3.3-70b-versatile", "GROQ_API_KEY"), ("deepseek", "deepseek-chat", "DEEPSEEK_API_KEY"), + ("volcengine_coding", "ark-code-latest", "VOLCENGINE_API_KEY"), ( "openrouter", "openrouter/google/gemini-2.5-flash", @@ -600,6 +601,7 @@ pub fn known_providers() -> &'static [&'static str] { "kimi_coding", "qianfan", "volcengine", + "volcengine_coding", "chutes", "venice", "nvidia", @@ -706,6 +708,7 @@ mod tests { assert!(providers.contains(&"kimi_coding")); assert!(providers.contains(&"qianfan")); assert!(providers.contains(&"volcengine")); + assert!(providers.contains(&"volcengine_coding")); assert!(providers.contains(&"chutes")); assert!(providers.contains(&"nvidia")); assert!(providers.contains(&"novita")); diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs index efe7b735d..e3791eb6a 100644 --- a/crates/openfang-runtime/src/model_catalog.rs +++ b/crates/openfang-runtime/src/model_catalog.rs @@ -226,6 +226,26 @@ impl ModelCatalog { return Some(entry); } + // Try provider-prefixed ID form (e.g. caller passes "minimax-m2.5" but + // the catalog entry carries the globally-unique id "volcengine_coding/minimax-m2.5"). + let prefixed = format!("{}/{}", provider, id_or_alias); + let prefixed_lower = prefixed.to_lowercase(); + let mut prefixed_ci: Option<&ModelCatalogEntry> = None; + for m in &self.models { + if m.provider != provider { + continue; + } + if m.id == prefixed { + return Some(m); + } + if m.id.to_lowercase() == prefixed_lower && prefixed_ci.is_none() { + prefixed_ci = Some(m); + } + } + if let Some(entry) = prefixed_ci { + return Some(entry); + } + // Display-name match scoped to provider if let Some(entry) = self .models @@ -309,6 +329,16 @@ impl ModelCatalog { .map(|m| (m.input_cost_per_m, m.output_cost_per_m)) } + /// Look up pricing for a model scoped to a specific provider. + /// + /// Delegates to `find_model_for_provider` so provider-prefixed IDs + /// (e.g. `volcengine_coding/minimax-m2.5`) are resolved correctly when + /// the caller only has the short API name (`minimax-m2.5`). + pub fn pricing_for_provider(&self, model_id: &str, provider: &str) -> Option<(f64, f64)> { + self.find_model_for_provider(model_id, provider) + .map(|m| (m.input_cost_per_m, m.output_cost_per_m)) + } + /// List all alias mappings. pub fn list_aliases(&self) -> &HashMap { &self.aliases @@ -865,7 +895,7 @@ fn builtin_providers() -> Vec { // ── Volcano Engine (Doubao) ────────────────────────────────── ProviderInfo { id: "volcengine".into(), - display_name: "Volcano Engine (Doubao)".into(), + display_name: "Volcano Engine".into(), api_key_env: "VOLCENGINE_API_KEY".into(), base_url: VOLCENGINE_BASE_URL.into(), key_required: true, @@ -3449,6 +3479,136 @@ fn builtin_models() -> Vec { supports_streaming: true, aliases: vec![], }, + // ══════════════════════════════════════════════════════════════ + // Volcano Engine Coding Plan (9) + // ══════════════════════════════════════════════════════════════ + ModelCatalogEntry { + id: "ark-code-latest".into(), + display_name: "ark-code-latest".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 131_072, + max_output_tokens: 8_192, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec!["ark-code".into()], + }, + ModelCatalogEntry { + id: "volcengine_coding/doubao-seed-2.0-code".into(), + display_name: "Doubao Seed 2.0 Code".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/doubao-seed-2.0-pro".into(), + display_name: "Doubao Seed 2.0 Pro".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/doubao-seed-2.0-lite".into(), + display_name: "Doubao Seed 2.0 Lite".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Fast, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/doubao-seed-code".into(), + display_name: "Doubao Seed Code".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/minimax-m2.5".into(), + display_name: "MiniMax M2.5".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 200_000, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/glm-4.7".into(), + display_name: "GLM 4.7".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Balanced, + context_window: 200_000, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/deepseek-v3.2".into(), + display_name: "DeepSeek V3.2".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 131_072, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "volcengine_coding/kimi-k2.5".into(), + display_name: "Kimi K2.5".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ══════════════════════════════════════════════════════════════ // Volcano Engine / Doubao (4) // ══════════════════════════════════════════════════════════════