RooCodeInc
diff --git a/‎.changeset/weak-mugs-battle.md‎
Lines changed: 5 additions & 0 deletions b/‎.changeset/weak-mugs-battle.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎package-lock.json‎
Lines changed: 1 addition & 1 deletion b/‎package-lock.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/api/providers/glama.ts‎
Lines changed: 131 additions & 0 deletions b/‎src/api/providers/glama.ts‎
Lines changed: 131 additions & 0 deletions
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Add the Glama provider (thanks @punkpeye!)
@@ -16,6 +16,7 @@ A fork of Cline, an autonomous coding agent, with some additional experimental f
 - Language selection for Cline's communication (English, Japanese, Spanish, French, German, and more)
 - Support for DeepSeek V3
 - Support for Amazon Nova and Meta 3, 3.1, and 3.2 models via AWS Bedrock
+- Support for Glama
 - Support for listing models from OpenAI-compatible providers
 - Per-tool MCP auto-approval
 - Enable/disable individual MCP servers
@@ -135,7 +136,7 @@ Thanks to [Claude 3.5 Sonnet's agentic coding capabilities](https://www-cdn.ant
 
 ### Use any API and Model
 
-Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
+Cline supports API providers like OpenRouter, Anthropic, Glama, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
 
 The extension also keeps track of total tokens and API usage cost for the entire task loop and individual requests, keeping you informed of spend every step of the way.
 
 
@@ -214,7 +214,7 @@
     "isbinaryfile": "^5.0.2",
     "mammoth": "^1.8.0",
     "monaco-vscode-textmate-theme-converter": "^0.1.7",
-    "openai": "^4.61.0",
+    "openai": "^4.73.1",
     "os-name": "^6.0.0",
     "p-wait-for": "^5.0.2",
     "pdf-parse": "^1.1.1",
 
@@ -1,4 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { GlamaHandler } from "./providers/glama"
 import { ApiConfiguration, ModelInfo } from "../shared/api"
 import { AnthropicHandler } from "./providers/anthropic"
 import { AwsBedrockHandler } from "./providers/bedrock"
@@ -26,6 +27,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 	switch (apiProvider) {
 		case "anthropic":
 			return new AnthropicHandler(options)
+		case "glama":
+			return new GlamaHandler(options)
 		case "openrouter":
 			return new OpenRouterHandler(options)
 		case "bedrock":
 
@@ -0,0 +1,131 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import axios from "axios"
+import OpenAI from "openai"
+import { ApiHandler } from "../"
+import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+import delay from "delay"
+
+export class GlamaHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: OpenAI
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+		this.client = new OpenAI({
+			baseURL: "https://glama.ai/api/gateway/openai/v1",
+			apiKey: this.options.glamaApiKey,
+		})
+	}
+
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to OpenAI format
+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
+		if (this.getModel().id.startsWith("anthropic/claude-3")) {
+			openAiMessages[0] = {
+				role: "system",
+				content: [
+					{
+						type: "text",
+						text: systemPrompt,
+						// @ts-ignore-next-line
+						cache_control: { type: "ephemeral" },
+					},
+				],
+			}
+
+			// Add cache_control to the last two user messages
+			// (note: this works because we only ever add one user message at a time,
+			// but if we added multiple we'd need to mark the user message before the last assistant message)
+			const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+			lastTwoUserMessages.forEach((msg) => {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end.
+					// but if it weren't there, and the user added a image_url type message,
+					// it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
+					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
+				}
+			})
+		}
+
+		// Required by Anthropic
+		// Other providers default to max tokens allowed.
+		let maxTokens: number | undefined
+
+		if (this.getModel().id.startsWith("anthropic/")) {
+			maxTokens = 8_192
+		}
+
+		const { data: completion, response } = await this.client.chat.completions.create({
+			model: this.getModel().id,
+			max_tokens: maxTokens,
+			temperature: 0,
+			messages: openAiMessages,
+			stream: true,
+		}).withResponse();
+
+		const completionRequestId = response.headers.get(
+			'x-completion-request-id',
+		);
+
+		for await (const chunk of completion) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+		}
+
+		try {
+			const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestId}`, {
+				headers: {
+					Authorization: `Bearer ${this.options.glamaApiKey}`,
+				},
+			})
+
+			const completionRequest = response.data;
+
+			if (completionRequest.tokenUsage) {
+				yield {
+					type: "usage",
+					inputTokens: completionRequest.tokenUsage.promptTokens,
+					outputTokens: completionRequest.tokenUsage.completionTokens,
+					totalCost: completionRequest.totalCostUsd,
+				}
+			}			
+		} catch (error) {
+			// ignore if fails
+			console.error("Error fetching Glama generation details:", error)
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.glamaModelId
+		const modelInfo = this.options.glamaModelInfo
+
+		if (modelId && modelInfo) {
+			return { id: modelId, info: modelInfo }
+		}
+		
+		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
+	}
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-cline": patch
 +---
++
 +Add the Glama provider (thanks @punkpeye!)