inference-gateway
diff --git a/‎.infer/config.yaml‎
Lines changed: 9 additions & 0 deletions b/‎.infer/config.yaml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 87 additions & 0 deletions b/‎README.md‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎cmd/chat.go‎
Lines changed: 2 additions & 0 deletions b/‎cmd/chat.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cmd/export.go‎
Lines changed: 2 additions & 1 deletion b/‎cmd/export.go‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎config/config.go‎
Lines changed: 9 additions & 1 deletion b/‎config/config.go‎
Lines changed: 9 additions & 1 deletion
@@ -12,6 +12,10 @@ gateway:
     - ollama_cloud/kimi-k2:1t
     - ollama_cloud/kimi-k2-thinking
     - ollama_cloud/deepseek-v3.1:671b
+    - groq/whisper-large-v3
+    - groq/whisper-large-v3-turbo
+    - groq/playai-tts
+    - groq/playai-tts-arabic
   vision_enabled: true
 client:
   timeout: 200
@@ -529,6 +533,7 @@ chat:
       mcp: true
       context_usage: true
       session_tokens: true
+      cost: true
       git_branch: true
 a2a:
   enabled: true
@@ -565,6 +570,10 @@ mcp:
   liveness_probe_enabled: true
   liveness_probe_interval: 10
   servers: []
+pricing:
+  enabled: true
+  currency: USD
+  custom_prices: {}
 init:
   prompt: |-
     Please analyze this project and generate a comprehensive AGENTS.md file. Start by using the Tree tool to understand the project structure.
 
@@ -30,6 +30,7 @@ and management of inference services.
 - [Commands](#commands)
 - [Tools for LLMs](#tools-for-llms)
 - [Configuration](#configuration)
+- [Cost Tracking](#cost-tracking)
 - [Tool Approval System](#tool-approval-system)
 - [Shortcuts](#shortcuts)
 - [Global Flags](#global-flags)
@@ -56,6 +57,7 @@ and management of inference services.
   - **Auto-Accept Mode**: All tools auto-approved for rapid execution (YOLO mode)
   - Toggle between modes with **Shift+Tab**
 - **Token Usage Tracking**: Accurate token counting with polyfill support for providers that don't return usage metrics
+- **Cost Tracking**: Real-time cost calculation for API usage with per-model breakdown and configurable pricing
 - **Inline History Auto-Completion**: Smart command history suggestions with inline completion
 - **Customizable Keybindings**: Fully configurable keyboard shortcuts for the chat interface
 - **Extensible Shortcuts System**: Create custom commands with AI-powered snippets - [Learn more →](docs/shortcuts-guide.md)
@@ -379,6 +381,90 @@ Example: `agent.model` → `INFER_AGENT_MODEL`
 
 For complete configuration documentation, including all options and environment variables, see [Configuration Reference](docs/configuration-reference.md).
 
+## Cost Tracking
+
+The CLI automatically tracks API costs based on token usage for all providers and models.
+Costs are calculated in real-time with support for both aggregate totals and per-model breakdowns.
+
+### Viewing Costs
+
+Use the `/cost` command in any chat session to see the cost breakdown:
+
+```bash
+# In chat, use the /cost shortcut
+/cost
+```
+
+This displays:
+
+- **Total session cost** in USD
+- **Input/output costs** separately
+- **Per-model breakdown** when using multiple models
+- **Token usage** for each model
+
+**Status Bar**: Session costs are also displayed in the status bar (e.g., `💰 $0.0234`) if enabled.
+
+### Configuring Pricing
+
+The CLI includes hardcoded pricing for 30+ models across all major providers
+(Anthropic, OpenAI, Google, DeepSeek, Groq, Mistral, Cohere, etc.).
+Prices are updated regularly to match current provider pricing.
+
+**Override pricing** for specific models or add pricing for custom models:
+
+```yaml
+# .infer/config.yaml
+pricing:
+  enabled: true
+  currency: "USD"
+  custom_prices:
+    # Override existing model pricing
+    "openai/gpt-4o":
+      input_price_per_mtoken: 2.50    # Price per million input tokens
+      output_price_per_mtoken: 10.00  # Price per million output tokens
+
+    # Add pricing for custom/local models
+    "ollama/llama3.2":
+      input_price_per_mtoken: 0.0
+      output_price_per_mtoken: 0.0
+
+    "custom-fine-tuned-model":
+      input_price_per_mtoken: 5.00
+      output_price_per_mtoken: 15.00
+```
+
+**Via environment variables:**
+
+```bash
+# Disable cost tracking entirely
+export INFER_PRICING_ENABLED=false
+
+# Override specific model pricing (use underscores in model names)
+export INFER_PRICING_CUSTOM_PRICES_OPENAI_GPT_4O_INPUT_PRICE_PER_MTOKEN=3.00
+export INFER_PRICING_CUSTOM_PRICES_OPENAI_GPT_4O_OUTPUT_PRICE_PER_MTOKEN=12.00
+
+# Hide cost from status bar
+export INFER_CHAT_STATUS_BAR_INDICATORS_COST=false
+```
+
+**Status Bar Configuration:**
+
+```yaml
+# .infer/config.yaml
+chat:
+  status_bar:
+    enabled: true
+    indicators:
+      cost: true  # Show/hide cost indicator
+```
+
+### Cost Calculation
+
+- Costs are calculated as: `(tokens / 1,000,000) × price_per_million_tokens`
+- Prices are per million tokens (input and output priced separately)
+- Models without pricing data (Ollama, free tiers) show $0.00
+- Token counts use actual usage from providers or polyfilled estimates
+
 ## Tool Approval System
 
 The CLI includes a comprehensive approval system for sensitive tool operations, providing security and
@@ -449,6 +535,7 @@ The CLI provides an extensible shortcuts system for quickly executing common com
 - `/help [shortcut]` - Show available shortcuts
 - `/switch [model]` - Switch to different model
 - `/theme [name]` - Switch chat theme
+- `/cost` - Show session cost breakdown with per-model details
 - `/compact` - Compact conversation
 - `/export [format]` - Export conversation
 
 
@@ -74,6 +74,7 @@ func StartChatSession(cfg *config.Config, v *viper.Viper) error {
 	toolService := services.GetToolService()
 	fileService := services.GetFileService()
 	imageService := services.GetImageService()
+	pricingService := services.GetPricingService()
 	shortcutRegistry := services.GetShortcutRegistry()
 	stateManager := services.GetStateManager()
 	messageQueue := services.GetMessageQueue()
@@ -97,6 +98,7 @@ func StartChatSession(cfg *config.Config, v *viper.Viper) error {
 		toolService,
 		fileService,
 		imageService,
+		pricingService,
 		shortcutRegistry,
 		stateManager,
 		messageQueue,
 
@@ -48,7 +48,8 @@ func runExport(sessionID string) error {
 
 	toolRegistry := tools.NewRegistry(cfg, nil, nil, nil)
 	toolFormatterService := services.NewToolFormatterService(toolRegistry)
-	persistentRepo := services.NewPersistentConversationRepository(toolFormatterService, storageBackend)
+	pricingService := services.NewPricingService(&cfg.Pricing)
+	persistentRepo := services.NewPersistentConversationRepository(toolFormatterService, pricingService, storageBackend)
 
 	ctx := context.Background()
 	if err := persistentRepo.LoadConversation(ctx, sessionID); err != nil {
 
@@ -37,6 +37,7 @@ type Config struct {
 	Chat             ChatConfig             `yaml:"chat" mapstructure:"chat"`
 	A2A              A2AConfig              `yaml:"a2a" mapstructure:"a2a"`
 	MCP              MCPConfig              `yaml:"mcp" mapstructure:"mcp"`
+	Pricing          PricingConfig          `yaml:"pricing" mapstructure:"pricing"`
 	Init             InitConfig             `yaml:"init" mapstructure:"init"`
 	Compact          CompactConfig          `yaml:"compact" mapstructure:"compact"`
 }
@@ -359,6 +360,7 @@ type StatusBarIndicators struct {
 	MCP              bool `yaml:"mcp" mapstructure:"mcp"`
 	ContextUsage     bool `yaml:"context_usage" mapstructure:"context_usage"`
 	SessionTokens    bool `yaml:"session_tokens" mapstructure:"session_tokens"`
+	Cost             bool `yaml:"cost" mapstructure:"cost"`
 	GitBranch        bool `yaml:"git_branch" mapstructure:"git_branch"`
 }
 
@@ -486,6 +488,7 @@ func GetDefaultStatusBarConfig() StatusBarConfig {
 			MCP:              true,
 			ContextUsage:     true,
 			SessionTokens:    true,
+			Cost:             true,
 			GitBranch:        true,
 		},
 	}
@@ -510,6 +513,10 @@ func DefaultConfig() *Config { //nolint:funlen
 				"ollama_cloud/kimi-k2:1t",
 				"ollama_cloud/kimi-k2-thinking",
 				"ollama_cloud/deepseek-v3.1:671b",
+				"groq/whisper-large-v3",
+				"groq/whisper-large-v3-turbo",
+				"groq/playai-tts",
+				"groq/playai-tts-arabic",
 			},
 			VisionEnabled: true,
 		},
@@ -851,7 +858,8 @@ Respond with ONLY the title, no quotes or explanation.`,
 				},
 			},
 		},
-		MCP: *DefaultMCPConfig(),
+		MCP:     *DefaultMCPConfig(),
+		Pricing: GetDefaultPricingConfig(),
 		Init: InitConfig{
 			Prompt: `Please analyze this project and generate a comprehensive AGENTS.md file. Start by using the Tree tool to understand the project structure.
 Use your available tools to examine configuration files, documentation, build systems, and development workflow.