diff --git a/src/api/providers/__tests__/anthropic-token-counting.test.ts b/src/api/providers/__tests__/anthropic-token-counting.test.ts
new file mode 100644
index 00000000000..aed95340a58
--- /dev/null
+++ b/src/api/providers/__tests__/anthropic-token-counting.test.ts
@@ -0,0 +1,257 @@
+// npx jest src/api/providers/__tests__/anthropic-token-counting.test.ts
+
+import { Anthropic } from "@anthropic-ai/sdk"
+import { AnthropicHandler } from "../anthropic"
+import { CLAUDE_MAX_SAFE_TOKEN_LIMIT } from "../constants"
+import { ApiHandlerOptions } from "../../../shared/api"
+
+// Mock the Anthropic client
+jest.mock("@anthropic-ai/sdk", () => {
+	const mockCountTokensResponse = {
+		input_tokens: 5000, // Default token count
+	}
+
+	const mockMessageResponse = {
+		id: "msg_123",
+		type: "message",
+		role: "assistant",
+		content: [{ type: "text", text: "This is a test response" }],
+		model: "claude-3-7-sonnet-20250219",
+		stop_reason: "end_turn",
+		usage: {
+			input_tokens: 5000,
+			output_tokens: 100,
+		},
+	}
+
+	// Mock stream implementation
+	const mockStream = {
+		[Symbol.asyncIterator]: async function* () {
+			yield {
+				type: "message_start",
+				message: {
+					id: "msg_123",
+					type: "message",
+					role: "assistant",
+					content: [],
+					model: "claude-3-7-sonnet-20250219",
+					stop_reason: null,
+					usage: {
+						input_tokens: 5000,
+						output_tokens: 0,
+					},
+				},
+			}
+			yield {
+				type: "content_block_start",
+				index: 0,
+				content_block: {
+					type: "text",
+					text: "This is a test response",
+				},
+			}
+			yield {
+				type: "message_delta",
+				usage: {
+					output_tokens: 100,
+				},
+			}
+			yield {
+				type: "message_stop",
+			}
+		},
+	}
+
+	return {
+		Anthropic: jest.fn().mockImplementation(() => {
+			return {
+				messages: {
+					create: jest.fn().mockImplementation((params) => {
+						if (params.stream) {
+							return mockStream
+						}
+						return mockMessageResponse
+					}),
+					countTokens: jest.fn().mockImplementation((params) => {
+						// If the messages array is very large, simulate a high token count
+						let tokenCount = mockCountTokensResponse.input_tokens
+
+						if (params.messages && params.messages.length > 10) {
+							tokenCount = CLAUDE_MAX_SAFE_TOKEN_LIMIT + 10000
+						}
+
+						return Promise.resolve({ input_tokens: tokenCount })
+					}),
+				},
+			}
+		}),
+	}
+})
+
+describe("AnthropicHandler Token Counting", () => {
+	// Test with Claude 3.7 Sonnet
+	describe("with Claude 3.7 Sonnet", () => {
+		const options: ApiHandlerOptions = {
+			apiKey: "test-key",
+			apiModelId: "claude-3-7-sonnet-20250219",
+		}
+
+		let handler: AnthropicHandler
+
+		beforeEach(() => {
+			handler = new AnthropicHandler(options)
+			jest.clearAllMocks()
+		})
+
+		it("should count tokens for content blocks", async () => {
+			const content = [{ type: "text" as const, text: "Hello, world!" }]
+			const count = await handler.countTokens(content)
+			expect(count).toBe(5000) // Mock returns 5000
+		})
+
+		it("should count tokens for a complete message", async () => {
+			const systemPrompt = "You are a helpful assistant."
+			const messages = [
+				{ role: "user" as const, content: "Hello!" },
+				{ role: "assistant" as const, content: "Hi there!" },
+				{ role: "user" as const, content: "How are you?" },
+			]
+
+			const count = await handler.countMessageTokens(systemPrompt, messages, "claude-3-7-sonnet-20250219")
+
+			expect(count).toBe(5000) // Mock returns 5000
+		})
+
+		it("should truncate conversation when token count exceeds limit", async () => {
+			// Create a large number of messages to trigger truncation
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = []
+
+			// Add 20 messages to exceed the token limit
+			for (let i = 0; i < 20; i++) {
+				messages.push({
+					role: i % 2 === 0 ? "user" : "assistant",
+					content: `Message ${i}: This is a test message that should have enough content to trigger the token limit when combined with other messages.`,
+				})
+			}
+
+			// Spy on console.warn to verify warning is logged
+			const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation()
+			const consoleLogSpy = jest.spyOn(console, "log").mockImplementation()
+
+			// Create a message stream
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Consume the stream to trigger the token counting and truncation
+			for await (const _ of stream) {
+				// Just consume the stream
+			}
+
+			// Verify that warnings were logged about token limit
+			expect(consoleWarnSpy).toHaveBeenCalled()
+			expect(consoleLogSpy).toHaveBeenCalled()
+
+			// Restore console.warn
+			consoleWarnSpy.mockRestore()
+			consoleLogSpy.mockRestore()
+		})
+	})
+
+	// Test with Claude 3 Opus
+	describe("with Claude 3 Opus", () => {
+		const options: ApiHandlerOptions = {
+			apiKey: "test-key",
+			apiModelId: "claude-3-opus-20240229",
+		}
+
+		let handler: AnthropicHandler
+
+		beforeEach(() => {
+			handler = new AnthropicHandler(options)
+			jest.clearAllMocks()
+		})
+
+		it("should truncate conversation when token count exceeds limit", async () => {
+			// Create a large number of messages to trigger truncation
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = []
+
+			// Add 20 messages to exceed the token limit
+			for (let i = 0; i < 20; i++) {
+				messages.push({
+					role: i % 2 === 0 ? "user" : "assistant",
+					content: `Message ${i}: This is a test message that should have enough content to trigger the token limit when combined with other messages.`,
+				})
+			}
+
+			// Spy on console.warn to verify warning is logged
+			const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation()
+			const consoleLogSpy = jest.spyOn(console, "log").mockImplementation()
+
+			// Create a message stream
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Consume the stream to trigger the token counting and truncation
+			for await (const _ of stream) {
+				// Just consume the stream
+			}
+
+			// Verify that warnings were logged about token limit
+			expect(consoleWarnSpy).toHaveBeenCalled()
+			expect(consoleLogSpy).toHaveBeenCalled()
+
+			// Restore console.warn
+			consoleWarnSpy.mockRestore()
+			consoleLogSpy.mockRestore()
+		})
+	})
+
+	// Test with Claude 3 Haiku
+	describe("with Claude 3 Haiku", () => {
+		const options: ApiHandlerOptions = {
+			apiKey: "test-key",
+			apiModelId: "claude-3-haiku-20240307",
+		}
+
+		let handler: AnthropicHandler
+
+		beforeEach(() => {
+			handler = new AnthropicHandler(options)
+			jest.clearAllMocks()
+		})
+
+		it("should truncate conversation when token count exceeds limit", async () => {
+			// Create a large number of messages to trigger truncation
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = []
+
+			// Add 20 messages to exceed the token limit
+			for (let i = 0; i < 20; i++) {
+				messages.push({
+					role: i % 2 === 0 ? "user" : "assistant",
+					content: `Message ${i}: This is a test message that should have enough content to trigger the token limit when combined with other messages.`,
+				})
+			}
+
+			// Spy on console.warn to verify warning is logged
+			const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation()
+			const consoleLogSpy = jest.spyOn(console, "log").mockImplementation()
+
+			// Create a message stream
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Consume the stream to trigger the token counting and truncation
+			for await (const _ of stream) {
+				// Just consume the stream
+			}
+
+			// Verify that warnings were logged about token limit
+			expect(consoleWarnSpy).toHaveBeenCalled()
+			expect(consoleLogSpy).toHaveBeenCalled()
+
+			// Restore console.warn
+			consoleWarnSpy.mockRestore()
+			consoleLogSpy.mockRestore()
+		})
+	})
+})
diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
index 5489b326093..72808f6ed31 100644
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -10,8 +10,9 @@ import {
 } from "../../shared/api"
 import { ApiStream } from "../transform/stream"
 import { BaseProvider } from "./base-provider"
-import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./constants"
+import { ANTHROPIC_DEFAULT_MAX_TOKENS, CLAUDE_MAX_SAFE_TOKEN_LIMIT } from "./constants"
 import { SingleCompletionHandler, getModelParams } from "../index"
+import { truncateConversation } from "../../core/sliding-window"
 
 export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
 	private options: ApiHandlerOptions
@@ -33,7 +34,61 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
 		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
-		let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel()
+		let { id: modelId, maxTokens, thinking, temperature, virtualId, info } = this.getModel()
+
+		// Check token count before sending the request for all Anthropic models
+		// Count tokens for the entire request
+		const tokenCount = await this.countMessageTokens(systemPrompt, messages, modelId)
+
+		// Get the context window size for the current model
+		const contextWindow = info.contextWindow || 200000
+
+		// Calculate a safe token limit (1k tokens below the context window)
+		const safeTokenLimit = Math.min(contextWindow - 1000, CLAUDE_MAX_SAFE_TOKEN_LIMIT)
+
+		// If token count exceeds the safe limit, truncate the conversation
+		if (tokenCount > safeTokenLimit) {
+			console.warn(
+				`Token count (${tokenCount}) exceeds safe limit (${safeTokenLimit}) for model ${modelId}. Truncating conversation.`,
+			)
+
+			// Calculate how much we need to truncate
+			const excessTokens = tokenCount - safeTokenLimit
+			const totalTokens = tokenCount
+
+			// Determine truncation fraction based on excess tokens
+			// Start with 0.5 (50%) and increase if needed
+			let truncationFraction = 0.5
+
+			// If we're significantly over the limit, increase truncation
+			if (excessTokens > totalTokens * 0.3) {
+				truncationFraction = 0.7
+			}
+
+			// Truncate the conversation
+			const originalLength = messages.length
+			messages = truncateConversation(messages, truncationFraction)
+
+			console.log(
+				`Truncated conversation from ${originalLength} to ${messages.length} messages to fit within token limit.`,
+			)
+
+			// Verify token count after truncation
+			const newTokenCount = await this.countMessageTokens(systemPrompt, messages, modelId)
+
+			// If still over limit, truncate again with a higher fraction
+			if (newTokenCount > safeTokenLimit) {
+				console.warn(
+					`After truncation, token count (${newTokenCount}) still exceeds safe limit. Truncating further.`,
+				)
+
+				messages = truncateConversation(messages, 0.8)
+
+				// Final verification
+				const finalTokenCount = await this.countMessageTokens(systemPrompt, messages, modelId)
+				console.log(`Final token count after truncation: ${finalTokenCount}`)
+			}
+		}
 
 		switch (modelId) {
 			case "claude-3-7-sonnet-20250219":
@@ -217,7 +272,32 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 	}
 
 	async completePrompt(prompt: string) {
-		let { id: model, temperature } = this.getModel()
+		let { id: model, temperature, info } = this.getModel()
+
+		// Check token count before sending the request for all Anthropic models
+		// Count tokens for the prompt
+		const tokenCount = await this.countTokens([{ type: "text", text: prompt }])
+
+		// Get the context window size for the current model
+		const contextWindow = info.contextWindow || 200000
+
+		// Calculate a safe token limit (1k tokens below the context window)
+		const safeTokenLimit = Math.min(contextWindow - 1000, CLAUDE_MAX_SAFE_TOKEN_LIMIT)
+
+		// If token count exceeds the safe limit, truncate the prompt
+		if (tokenCount > safeTokenLimit) {
+			console.warn(
+				`Prompt token count (${tokenCount}) exceeds safe limit (${safeTokenLimit}) for model ${model}. Truncating prompt.`,
+			)
+
+			// Calculate how much we need to truncate
+			const ratio = safeTokenLimit / tokenCount
+			const newLength = Math.floor(prompt.length * ratio * 0.9) // 90% of the calculated length for safety
+
+			// Truncate the prompt
+			prompt = prompt.substring(0, newLength)
+			console.log(`Truncated prompt to ${newLength} characters to fit within token limit.`)
+		}
 
 		const message = await this.client.messages.create({
 			model,
@@ -257,4 +337,47 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			return super.countTokens(content)
 		}
 	}
+
+	/**
+	 * Counts tokens for a complete message request using Anthropic's API
+	 *
+	 * @param systemPrompt The system prompt
+	 * @param messages The conversation messages
+	 * @param model The model ID
+	 * @returns A promise resolving to the token count
+	 */
+	async countMessageTokens(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		model: string,
+	): Promise<number> {
+		try {
+			const response = await this.client.messages.countTokens({
+				model,
+				system: systemPrompt,
+				messages: messages,
+			})
+
+			return response.input_tokens
+		} catch (error) {
+			// Log error but fallback to estimating tokens by counting each part separately
+			console.warn("Anthropic message token counting failed, using fallback", error)
+
+			// Fallback: Count system prompt tokens
+			const systemTokens = await this.countTokens([{ type: "text", text: systemPrompt }])
+
+			// Count tokens for each message
+			let messageTokens = 0
+			for (const message of messages) {
+				if (typeof message.content === "string") {
+					messageTokens += await this.countTokens([{ type: "text", text: message.content }])
+				} else {
+					messageTokens += await this.countTokens(message.content)
+				}
+			}
+
+			// Add some overhead for message formatting
+			return systemTokens + messageTokens + messages.length * 5
+		}
+	}
 }
diff --git a/src/api/providers/constants.ts b/src/api/providers/constants.ts
index 4d6c4672e50..d1a00455df1 100644
--- a/src/api/providers/constants.ts
+++ b/src/api/providers/constants.ts
@@ -5,4 +5,7 @@ export const DEFAULT_HEADERS = {
 
 export const ANTHROPIC_DEFAULT_MAX_TOKENS = 8192
 
+// Maximum safe token limit for Claude 3.7 Sonnet (200k - 1k safety buffer)
+export const CLAUDE_MAX_SAFE_TOKEN_LIMIT = 199000
+
 export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6
diff --git a/src/core/__tests__/Cline.test.ts b/src/core/__tests__/Cline.test.ts
index 14540c834c3..93b9d4d301e 100644
--- a/src/core/__tests__/Cline.test.ts
+++ b/src/core/__tests__/Cline.test.ts
@@ -321,6 +321,7 @@ describe("Cline", () => {
 
 	describe("getEnvironmentDetails", () => {
 		describe("API conversation handling", () => {
+			// Set timeout to 15 seconds for this specific test
 			it("should clean conversation history before sending to API", async () => {
 				// Cline.create will now use our mocked getEnvironmentDetails
 				const [cline, task] = Cline.create({
@@ -387,7 +388,7 @@ describe("Cline", () => {
 
 				// Verify extra properties were removed
 				expect(Object.keys(cleanedMessage!)).toEqual(["role", "content"])
-			})
+			}, 15000)
 
 			it("should handle image blocks based on model capabilities", async () => {
 				// Create two configurations - one with image support, one without
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts
index 75395ecd758..2f0d5731494 100644
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -6,6 +6,12 @@ import { ApiHandler } from "../../api"
  */
 export const TOKEN_BUFFER_PERCENTAGE = 0.1
 
+/**
+ * Maximum safe token limit for Claude 3.7 Sonnet (200k - 1k safety buffer)
+ * This is imported from constants.ts but redefined here to avoid circular dependencies
+ */
+export const CLAUDE_MAX_SAFE_TOKEN_LIMIT = 199000
+
 /**
  * Counts tokens for user content using the provider's token counting implementation.
  *
@@ -91,6 +97,39 @@ export async function truncateConversationIfNeeded({
 	// Calculate total effective tokens (totalTokens never includes the last message)
 	const effectiveTokens = totalTokens + lastMessageTokens
 
+	// Special handling for Anthropic models to ensure we stay under the context window limit
+	const { id: modelId, info } = apiHandler.getModel()
+
+	// Check if this is an Anthropic model
+	if (modelId.startsWith("claude-")) {
+		// Get the context window size for the current model
+		const modelContextWindow = info.contextWindow || 200000
+
+		// Calculate a safe token limit (1k tokens below the context window)
+		const safeTokenLimit = Math.min(modelContextWindow - 1000, CLAUDE_MAX_SAFE_TOKEN_LIMIT)
+
+		if (effectiveTokens > safeTokenLimit) {
+			console.warn(
+				`Token count (${effectiveTokens}) exceeds safe limit (${safeTokenLimit}) for model ${modelId}. Using aggressive truncation.`,
+			)
+
+			// Calculate how much we need to truncate
+			const excessTokens = effectiveTokens - safeTokenLimit
+
+			// Determine truncation fraction based on excess tokens
+			// Start with 0.5 (50%) and increase if needed
+			let truncationFraction = 0.5
+
+			// If we're significantly over the limit, increase truncation
+			if (excessTokens > effectiveTokens * 0.3) {
+				truncationFraction = 0.7
+			}
+
+			return truncateConversation(messages, truncationFraction)
+		}
+	}
+
+	// Standard truncation logic for other models
 	// Calculate available tokens for conversation history
 	// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
 	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens