fix(models): Update available models documentation and add new Arcee AI models with detailed descriptions

crmne · crmne · commit c7adb2f8d41b · 2025-05-06T12:02:53.000+02:00
diff --git a/docs/guides/available-models.md b/docs/guides/available-models.md
@@ -46,10 +46,10 @@ For more information about working with models, see the [Working with Models](/g
 ## Models by Type
 {: .d-inline-block }
 
-Last updated: 2025-05-05
+Last updated: 2025-05-06
 {: .label .label-green }
 
-### Chat Models (458)
+### Chat Models (464)
 
 | ID | Type | Name | Provider | Context | MaxTok | Family | In$/M | Out$/M |
 | :-- | :-- | :-- | :-- | --: | --: | :-- | --: | --: |
@@ -238,6 +238,12 @@ Last updated: 2025-05-05
 | anthropic/claude-3.7-sonnet:beta | chat | Anthropic: Claude 3.7 Sonnet (self-moderated) | openrouter | 200000 | 128000 | other | 3.00 | 15.00 |
 | anthropic/claude-3.7-sonnet:thinking | chat | Anthropic: Claude 3.7 Sonnet (thinking) | openrouter | 200000 | 64000 | other | 3.00 | 15.00 |
 | arcee-ai/arcee-blitz | chat | Arcee AI: Arcee Blitz | openrouter | 32768 |  | other | 0.45 | 0.75 |
+| arcee-ai/caller-large | chat | Arcee AI: Caller Large | openrouter | 32768 |  | other | 0.55 | 0.85 |
+| arcee-ai/coder-large | chat | Arcee AI: Coder Large | openrouter | 32768 |  | other | 0.50 | 0.80 |
+| arcee-ai/maestro-reasoning | chat | Arcee AI: Maestro Reasoning | openrouter | 131072 |  | other | 0.90 | 3.30 |
+| arcee-ai/spotlight | chat | Arcee AI: Spotlight | openrouter | 131072 |  | other | 0.18 | 0.18 |
+| arcee-ai/virtuoso-large | chat | Arcee AI: Virtuoso Large | openrouter | 131072 |  | other | 0.75 | 1.20 |
+| arcee-ai/virtuoso-medium-v2 | chat | Arcee AI: Virtuoso Medium V2 | openrouter | 131072 |  | other | 0.50 | 0.80 |
 | arliai/qwq-32b-arliai-rpr-v1:free | chat | ArliAI: QwQ 32B RpR v1 (free) | openrouter | 32768 |  | other | 0.00 | 0.00 |
 | bytedance-research/ui-tars-72b:free | chat | Bytedance: UI-TARS 72B  (free) | openrouter | 32768 |  | other | 0.00 | 0.00 |
 | cognitivecomputations/dolphin-mixtral-8x22b | chat | Dolphin 2.9.2 Mixtral 8x22B 🐬 | openrouter | 16000 |  | other | 0.90 | 0.90 |
@@ -740,7 +746,7 @@ Last updated: 2025-05-05
 | anthropic.claude-v2:1:200k | chat | Claude | bedrock | 200000 | 4096 | claude2 | 8.00 | 24.00 |
 | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | chat | Claude 3.7 Sonnet | bedrock | 200000 | 4096 | claude3_sonnet | 3.00 | 15.00 |
 
-### Openrouter Models (319)
+### Openrouter Models (325)
 
   | ID | Type | Name | Provider | Context | MaxTok | Family | In$/M | Out$/M |
 | :-- | :-- | :-- | :-- | --: | --: | :-- | --: | --: |
@@ -790,6 +796,12 @@ Last updated: 2025-05-05
 | anthropic/claude-3.7-sonnet:beta | chat | Anthropic: Claude 3.7 Sonnet (self-moderated) | openrouter | 200000 | 128000 | other | 3.00 | 15.00 |
 | anthropic/claude-3.7-sonnet:thinking | chat | Anthropic: Claude 3.7 Sonnet (thinking) | openrouter | 200000 | 64000 | other | 3.00 | 15.00 |
 | arcee-ai/arcee-blitz | chat | Arcee AI: Arcee Blitz | openrouter | 32768 |  | other | 0.45 | 0.75 |
+| arcee-ai/caller-large | chat | Arcee AI: Caller Large | openrouter | 32768 |  | other | 0.55 | 0.85 |
+| arcee-ai/coder-large | chat | Arcee AI: Coder Large | openrouter | 32768 |  | other | 0.50 | 0.80 |
+| arcee-ai/maestro-reasoning | chat | Arcee AI: Maestro Reasoning | openrouter | 131072 |  | other | 0.90 | 3.30 |
+| arcee-ai/spotlight | chat | Arcee AI: Spotlight | openrouter | 131072 |  | other | 0.18 | 0.18 |
+| arcee-ai/virtuoso-large | chat | Arcee AI: Virtuoso Large | openrouter | 131072 |  | other | 0.75 | 1.20 |
+| arcee-ai/virtuoso-medium-v2 | chat | Arcee AI: Virtuoso Medium V2 | openrouter | 131072 |  | other | 0.50 | 0.80 |
 | arliai/qwq-32b-arliai-rpr-v1:free | chat | ArliAI: QwQ 32B RpR v1 (free) | openrouter | 32768 |  | other | 0.00 | 0.00 |
 | bytedance-research/ui-tars-72b:free | chat | Bytedance: UI-TARS 72B  (free) | openrouter | 32768 |  | other | 0.00 | 0.00 |
 | cognitivecomputations/dolphin-mixtral-8x22b | chat | Dolphin 2.9.2 Mixtral 8x22B 🐬 | openrouter | 16000 |  | other | 0.90 | 0.90 |
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -5634,7 +5634,7 @@
     "output_price_per_million": 0.75,
     "metadata": {
       "object": null,
-      "description": "Arcee-Blitz (24B) is a 24B-parameter instruct model built on Mistral-Small-24B and distilled from DeepSeek-V3. It emphasizes speed and practicality, with strong performance across general tasks and notable gains in world knowledge benchmarks. The model uses DeepSeek logits as pretraining targets and underwent additional fine-tuning to improve instruction-following and reasoning capabilities.\n\nBenchmarks show consistent improvements over Mistral-Small-3, particularly on MMLU-Pro, Math, and MixEval. Arcee-Blitz supports a 32k context length and is released under the Apache 2.0 license.",
+      "description": "Arcee Blitz is a 24 B‑parameter dense model distilled from DeepSeek and built on Mistral architecture for \"everyday\" chat. The distillation‑plus‑refinement pipeline trims compute while keeping DeepSeek‑style reasoning, so Blitz punches above its weight on MMLU, GSM‑8K and BBH compared with other mid‑size open models. With a default 128 k context window and competitive throughput, it serves as a cost‑efficient workhorse for summarization, brainstorming and light code help. Internally, Arcee uses Blitz as the default writer in Conductor pipelines when the heavier Virtuoso line is not required. Users therefore get near‑70 B quality at ~⅓ the latency and price. ",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -5661,6 +5661,265 @@
       }
     }
   },
+  {
+    "id": "arcee-ai/caller-large",
+    "created_at": "2025-05-06T01:31:09+02:00",
+    "display_name": "Arcee AI: Caller Large",
+    "provider": "openrouter",
+    "context_window": 32768,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.55,
+    "output_price_per_million": 0.85,
+    "metadata": {
+      "object": null,
+      "description": "Caller Large is Arcee's specialist \"function‑calling\" SLM built to orchestrate external tools and APIs. Instead of maximizing next‑token accuracy, training focuses on structured JSON outputs, parameter extraction and multi‑step tool chains, making Caller a natural choice for retrieval‑augmented generation, robotic process automation or data‑pull chatbots. It incorporates a routing head that decides when (and how) to invoke a tool versus answering directly, reducing hallucinated calls. The model is already the backbone of Arcee Conductor's auto‑tool mode, where it parses user intent, emits clean function signatures and hands control back once the tool response is ready. Developers thus gain an OpenAI‑style function‑calling UX without handing requests to a frontier‑scale model. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.00000055",
+        "completion": "0.00000085",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 32768,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
+  {
+    "id": "arcee-ai/coder-large",
+    "created_at": "2025-05-05T22:57:43+02:00",
+    "display_name": "Arcee AI: Coder Large",
+    "provider": "openrouter",
+    "context_window": 32768,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.5,
+    "output_price_per_million": 0.7999999999999999,
+    "metadata": {
+      "object": null,
+      "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file refactoring or long diff review in a single call, and understands 30‑plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5–8 pt gains over CodeLlama‑34 B‑Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost‑wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.0000005",
+        "completion": "0.0000008",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 32768,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
+  {
+    "id": "arcee-ai/maestro-reasoning",
+    "created_at": "2025-05-05T23:41:09+02:00",
+    "display_name": "Arcee AI: Maestro Reasoning",
+    "provider": "openrouter",
+    "context_window": 131072,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.8999999999999999,
+    "output_price_per_million": 3.3000000000000003,
+    "metadata": {
+      "object": null,
+      "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass‑rate on MATH and GSM‑8K, while also lifting code completion accuracy. Its instruction style encourages structured \"thought → answer\" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit‑focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi‑constraint queries that smaller SLMs bounce. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.0000009",
+        "completion": "0.0000033",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
+  {
+    "id": "arcee-ai/spotlight",
+    "created_at": "2025-05-05T23:45:52+02:00",
+    "display_name": "Arcee AI: Spotlight",
+    "provider": "openrouter",
+    "context_window": 131072,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.18,
+    "output_price_per_million": 0.18,
+    "metadata": {
+      "object": null,
+      "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question‑answering, and diagram‑analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock‑ups need to be interpreted on the fly. Early benchmarks show it matching or out‑scoring larger VLMs such as LLaVA‑1.6 13 B on popular VQA and POPE alignment tests. ",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.00000018",
+        "completion": "0.00000018",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
+  {
+    "id": "arcee-ai/virtuoso-large",
+    "created_at": "2025-05-05T23:01:25+02:00",
+    "display_name": "Arcee AI: Virtuoso Large",
+    "provider": "openrouter",
+    "context_window": 131072,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.75,
+    "output_price_per_million": 1.2,
+    "metadata": {
+      "object": null,
+      "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.00000075",
+        "completion": "0.0000012",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
+  {
+    "id": "arcee-ai/virtuoso-medium-v2",
+    "created_at": "2025-05-05T22:53:54+02:00",
+    "display_name": "Arcee AI: Virtuoso Medium V2",
+    "provider": "openrouter",
+    "context_window": 131072,
+    "max_tokens": null,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.5,
+    "output_price_per_million": 0.7999999999999999,
+    "metadata": {
+      "object": null,
+      "description": "Virtuoso‑Medium‑v2 is a 32 B model distilled from DeepSeek‑v3 logits and merged back onto a Qwen 2.5 backbone, yielding a sharper, more factual successor to the original Virtuoso Medium. The team harvested ~1.1 B logit tokens and applied \"fusion‑merging\" plus DPO alignment, which pushed scores past Arcee‑Nova 2024 and many 40 B‑plus peers on MMLU‑Pro, MATH and HumanEval. With a 128 k context and aggressive quantization options (from BF16 down to 4‑bit GGUF), it balances capability with deployability on single‑GPU nodes. Typical use cases include enterprise chat assistants, technical writing aids and medium‑complexity code drafting where Virtuoso‑Large would be overkill. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "pricing": {
+        "prompt": "0.0000005",
+        "completion": "0.0000008",
+        "request": "0",
+        "image": "0",
+        "web_search": "0",
+        "internal_reasoning": "0"
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      }
+    }
+  },
   {
     "id": "arliai/qwq-32b-arliai-rpr-v1:free",
     "created_at": "2025-04-13T16:53:02+02:00",