Skip to content
Open
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ For production workloads, use the [WhatsApp Cloud API](https://developers.facebo

3 native drivers (Anthropic, Gemini, OpenAI-compatible) route to 27 providers:

Anthropic, Gemini, OpenAI, Groq, DeepSeek, OpenRouter, Together, Mistral, Fireworks, Cohere, Perplexity, xAI, AI21, Cerebras, SambaNova, HuggingFace, Replicate, Ollama, vLLM, LM Studio, Qwen, MiniMax, Zhipu, Moonshot, Qianfan, Bedrock, and more.
Anthropic, Gemini, OpenAI, Groq, Volcengine, DeepSeek, OpenRouter, Together, Mistral, Fireworks, Cohere, Perplexity, xAI, AI21, Cerebras, SambaNova, HuggingFace, Replicate, Ollama, vLLM, LM Studio, Qwen, MiniMax, Zhipu, Moonshot, Qianfan, Bedrock, and more.

Intelligent routing with task complexity scoring, automatic fallback, cost tracking, and per-model pricing.

Expand Down
1 change: 1 addition & 0 deletions crates/openfang-cli/src/launcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[
("ANTHROPIC_API_KEY", "Anthropic"),
("OPENAI_API_KEY", "OpenAI"),
("DEEPSEEK_API_KEY", "DeepSeek"),
("VOLCENGINE_API_KEY", "Volcano Engine"),
("GEMINI_API_KEY", "Gemini"),
("GOOGLE_API_KEY", "Gemini"),
("GROQ_API_KEY", "Groq"),
Expand Down
22 changes: 22 additions & 0 deletions crates/openfang-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,18 @@ fn provider_list() -> Vec<(&'static str, &'static str, &'static str, &'static st
("groq", "GROQ_API_KEY", "llama-3.3-70b-versatile", "Groq"),
("gemini", "GEMINI_API_KEY", "gemini-2.5-flash", "Gemini"),
("deepseek", "DEEPSEEK_API_KEY", "deepseek-chat", "DeepSeek"),
(
"volcengine_coding",
"VOLCENGINE_API_KEY",
"ark-code-latest",
"Volcano Engine Coding Plan",
),
(
"volcengine",
"VOLCENGINE_API_KEY",
"doubao-seed-1-6-251015",
"Volcano Engine",
),
(
"anthropic",
"ANTHROPIC_API_KEY",
Expand Down Expand Up @@ -4774,6 +4786,7 @@ fn provider_to_env_var(provider: &str) -> String {
"perplexity" => "PERPLEXITY_API_KEY".to_string(),
"cohere" => "COHERE_API_KEY".to_string(),
"xai" => "XAI_API_KEY".to_string(),
"volcengine" | "doubao" | "volcengine_coding" => "VOLCENGINE_API_KEY".to_string(),
"brave" => "BRAVE_API_KEY".to_string(),
"tavily" => "TAVILY_API_KEY".to_string(),
other => format!("{}_API_KEY", other.to_uppercase()),
Expand Down Expand Up @@ -4825,6 +4838,15 @@ pub(crate) fn test_api_key(provider: &str, env_var: &str) -> bool {
.get("https://openrouter.ai/api/v1/models")
.bearer_auth(&key)
.send(),
"volcengine" | "doubao" => {
let base = openfang_types::model_catalog::VOLCENGINE_BASE_URL.trim_end_matches('/');
client.get(format!("{base}/models")).bearer_auth(&key).send()
}
"volcengine_coding" => {
let base = openfang_types::model_catalog::VOLCENGINE_CODING_BASE_URL
.trim_end_matches('/');
client.get(format!("{base}/models")).bearer_auth(&key).send()
}
// Bedrock bearer tokens are only valid against bedrock-runtime, not the
// management plane. There is no cheap region-agnostic probe, so skip.
"bedrock" => return true,
Expand Down
52 changes: 47 additions & 5 deletions crates/openfang-cli/src/tui/screens/init_wizard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,22 @@ const PROVIDERS: &[ProviderInfo] = &[
needs_key: true,
hint: "",
},
ProviderInfo {
name: "volcengine_coding",
display: "Volcano Engine Coding Plan",
env_var: "VOLCENGINE_API_KEY",
default_model: "ark-code-latest",
needs_key: true,
hint: "",
},
ProviderInfo {
name: "volcengine",
display: "Volcano Engine",
env_var: "VOLCENGINE_API_KEY",
default_model: "doubao-seed-1-6-251015",
needs_key: true,
hint: "",
},
ProviderInfo {
name: "openrouter",
display: "OpenRouter",
Expand Down Expand Up @@ -328,6 +344,18 @@ struct ModelEntry {
cost: String,
}

fn model_cost_label(provider: &str, input_cost_per_m: f64, output_cost_per_m: f64) -> String {
if input_cost_per_m == 0.0 && output_cost_per_m == 0.0 {
if provider == "volcengine_coding" {
"see provider pricing".to_string()
} else {
"free".to_string()
}
} else {
format!("${:.2}/${:.2}", input_cost_per_m, output_cost_per_m)
}
}

const ROUTING_TIER_NAMES: [&str; 3] = ["Fast", "Balanced", "Frontier"];
const ROUTING_TIER_DESC: [&str; 3] = [
"quick lookups, greetings, simple Q&A",
Expand Down Expand Up @@ -515,11 +543,7 @@ impl State {

for (i, m) in models.iter().enumerate() {
let tier = tier_label(m.tier);
let cost = if m.input_cost_per_m == 0.0 && m.output_cost_per_m == 0.0 {
"free".to_string()
} else {
format!("${:.2}/${:.2}", m.input_cost_per_m, m.output_cost_per_m)
};
let cost = model_cost_label(p.name, m.input_cost_per_m, m.output_cost_per_m);

if m.id == p.default_model {
default_idx = i;
Expand Down Expand Up @@ -1200,6 +1224,24 @@ fn handle_migration_key(
}
}

#[cfg(test)]
mod tests {
use super::model_cost_label;

#[test]
fn volcengine_coding_zero_cost_models_do_not_render_as_free() {
assert_eq!(
model_cost_label("volcengine_coding", 0.0, 0.0),
"see provider pricing"
);
}

#[test]
fn zero_cost_models_for_other_providers_still_render_as_free() {
assert_eq!(model_cost_label("ollama", 0.0, 0.0), "free");
}
}

// ── Routing step key handler ───────────────────────────────────────────────

fn handle_routing_key(state: &mut State, code: KeyCode) {
Expand Down
1 change: 1 addition & 0 deletions crates/openfang-cli/src/tui/screens/welcome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[
("ANTHROPIC_API_KEY", "Anthropic"),
("OPENAI_API_KEY", "OpenAI"),
("DEEPSEEK_API_KEY", "DeepSeek"),
("VOLCENGINE_API_KEY", "Volcano Engine"),
("GEMINI_API_KEY", "Gemini"),
("GOOGLE_API_KEY", "Gemini"),
("GROQ_API_KEY", "Groq"),
Expand Down
12 changes: 12 additions & 0 deletions crates/openfang-cli/src/tui/screens/wizard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ const PROVIDERS: &[ProviderInfo] = &[
default_model: "qwen-plus",
needs_key: true,
},
ProviderInfo {
name: "volcengine_coding",
env_var: "VOLCENGINE_API_KEY",
default_model: "ark-code-latest",
needs_key: true,
},
ProviderInfo {
name: "volcengine",
env_var: "VOLCENGINE_API_KEY",
default_model: "doubao-seed-1-6-251015",
needs_key: true,
},
ProviderInfo {
name: "perplexity",
env_var: "PERPLEXITY_API_KEY",
Expand Down
21 changes: 15 additions & 6 deletions crates/openfang-kernel/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1884,8 +1884,11 @@ impl OpenFangKernel {

// Look up model's actual context window from the catalog
let ctx_window = self.model_catalog.read().ok().and_then(|cat| {
cat.find_model(&entry.manifest.model.model)
.map(|m| m.context_window as usize)
cat.find_model_for_provider(
&entry.manifest.model.model,
&entry.manifest.model.provider,
)
.map(|m| m.context_window as usize)
});

let (tx, rx) = tokio::sync::mpsc::channel::<StreamEvent>(64);
Expand Down Expand Up @@ -2174,12 +2177,14 @@ impl OpenFangKernel {

// Persist usage to database (same as non-streaming path)
let model = &manifest.model.model;
let cost = MeteringEngine::estimate_cost_with_catalog(
let provider = &manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog_for_provider(
&kernel_clone
.model_catalog
.read()
.unwrap_or_else(|e| e.into_inner()),
model,
provider,
result.total_usage.input_tokens,
result.total_usage.output_tokens,
);
Expand Down Expand Up @@ -2663,7 +2668,7 @@ impl OpenFangKernel {

// Look up model's actual context window from the catalog
let ctx_window = self.model_catalog.read().ok().and_then(|cat| {
cat.find_model(&manifest.model.model)
cat.find_model_for_provider(&manifest.model.model, &manifest.model.provider)
.map(|m| m.context_window as usize)
});

Expand Down Expand Up @@ -2735,9 +2740,11 @@ impl OpenFangKernel {

// Record usage in the metering engine (uses catalog pricing as single source of truth)
let model = &manifest.model.model;
let cost = MeteringEngine::estimate_cost_with_catalog(
let provider = &manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog_for_provider(
&self.model_catalog.read().unwrap_or_else(|e| e.into_inner()),
model,
provider,
result.total_usage.input_tokens,
result.total_usage.output_tokens,
);
Expand Down Expand Up @@ -3258,9 +3265,11 @@ impl OpenFangKernel {
.unwrap_or((0, 0));

let model = &entry.manifest.model.model;
let cost = MeteringEngine::estimate_cost_with_catalog(
let provider = &entry.manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog_for_provider(
&self.model_catalog.read().unwrap_or_else(|e| e.into_inner()),
model,
provider,
input_tokens,
output_tokens,
);
Expand Down
19 changes: 19 additions & 0 deletions crates/openfang-kernel/src/metering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,25 @@ impl MeteringEngine {
input_cost + output_cost
}

/// Like `estimate_cost_with_catalog` but scopes the catalog lookup to `provider`.
///
/// Prevents cross-provider pricing errors when multiple providers share the same
/// short model name (e.g. `minimax-m2.5` exists under both MiniMax and Volcengine).
pub fn estimate_cost_with_catalog_for_provider(
catalog: &openfang_runtime::model_catalog::ModelCatalog,
model: &str,
provider: &str,
input_tokens: u64,
output_tokens: u64,
) -> f64 {
let (input_per_m, output_per_m) = catalog
.pricing_for_provider(model, provider)
.unwrap_or((1.0, 3.0));
let input_cost = (input_tokens as f64 / 1_000_000.0) * input_per_m;
let output_cost = (output_tokens as f64 / 1_000_000.0) * output_per_m;
input_cost + output_cost
}

/// Clean up old usage records.
pub fn cleanup(&self, days: u32) -> OpenFangResult<usize> {
self.store.cleanup_old(days)
Expand Down
3 changes: 3 additions & 0 deletions crates/openfang-runtime/src/drivers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ pub fn detect_available_provider() -> Option<(&'static str, &'static str, &'stat
("gemini", "gemini-2.5-flash", "GEMINI_API_KEY"),
("groq", "llama-3.3-70b-versatile", "GROQ_API_KEY"),
("deepseek", "deepseek-chat", "DEEPSEEK_API_KEY"),
("volcengine_coding", "ark-code-latest", "VOLCENGINE_API_KEY"),
(
"openrouter",
"openrouter/google/gemini-2.5-flash",
Expand Down Expand Up @@ -600,6 +601,7 @@ pub fn known_providers() -> &'static [&'static str] {
"kimi_coding",
"qianfan",
"volcengine",
"volcengine_coding",
"chutes",
"venice",
"nvidia",
Expand Down Expand Up @@ -706,6 +708,7 @@ mod tests {
assert!(providers.contains(&"kimi_coding"));
assert!(providers.contains(&"qianfan"));
assert!(providers.contains(&"volcengine"));
assert!(providers.contains(&"volcengine_coding"));
assert!(providers.contains(&"chutes"));
assert!(providers.contains(&"nvidia"));
assert!(providers.contains(&"novita"));
Expand Down
Loading