RooCodeInc
diff --git a/‎huggingface-refactor-plan.md‎
Lines changed: 83 additions & 0 deletions b/‎huggingface-refactor-plan.md‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎src/api/huggingface-models.ts‎
Lines changed: 0 additions & 17 deletions b/‎src/api/huggingface-models.ts‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/services/huggingface-models.ts‎ ‎src/api/providers/fetchers/huggingface.ts‎src/services/huggingface-models.ts renamed to src/api/providers/fetchers/huggingface.ts
Lines changed: 60 additions & 24 deletions b/‎src/services/huggingface-models.ts‎ ‎src/api/providers/fetchers/huggingface.ts‎src/services/huggingface-models.ts renamed to src/api/providers/fetchers/huggingface.ts
Lines changed: 60 additions & 24 deletions
diff --git a/‎src/api/providers/fetchers/modelCache.ts‎
Lines changed: 4 additions & 0 deletions b/‎src/api/providers/fetchers/modelCache.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/api/providers/huggingface.ts‎
Lines changed: 38 additions & 31 deletions b/‎src/api/providers/huggingface.ts‎
Lines changed: 38 additions & 31 deletions
@@ -0,0 +1,83 @@
+# HuggingFace Provider Refactoring Plan
+
+## Overview
+
+The HuggingFace provider implementation needs to be refactored to match the established pattern used by other providers that fetch models via network calls (e.g., OpenRouter, Glama, Ollama, etc.).
+
+## Current Implementation Issues
+
+1. **File locations are incorrect:**
+
+    - `src/services/huggingface-models.ts` - Should be in `src/api/providers/fetchers/`
+    - `src/api/huggingface-models.ts` - Unnecessary wrapper, should be removed
+
+2. **Pattern mismatch:**
+    - Current implementation returns raw HuggingFace model data
+    - Should return `ModelInfo` records like other providers
+    - Not integrated with the `modelCache.ts` system
+    - Provider doesn't use `RouterProvider` base class or `fetchModel` pattern
+
+## Established Pattern (from other providers)
+
+### 1. Fetcher Pattern (`src/api/providers/fetchers/`)
+
+- Fetcher files export a function like `getHuggingFaceModels()` that returns `Record<string, ModelInfo>`
+- Fetchers handle API calls and transform raw data to `ModelInfo` format
+- Example: `getOpenRouterModels()`, `getGlamaModels()`, `getOllamaModels()`
+
+### 2. Provider Pattern (`src/api/providers/`)
+
+- Providers either:
+    - Extend `RouterProvider` and use `fetchModel()` (e.g., Glama)
+    - Implement their own `fetchModel()` pattern (e.g., OpenRouter)
+- Use `getModels()` from `modelCache.ts` to fetch and cache models
+
+### 3. Model Cache Integration
+
+- `RouterName` type includes all providers that use the cache
+- `modelCache.ts` has a switch statement that calls the appropriate fetcher
+- Provides memory and file caching for model lists
+
+## Implementation Steps
+
+### Step 1: Create new fetcher
+
+- Move `src/services/huggingface-models.ts` to `src/api/providers/fetchers/huggingface.ts`
+- Transform the fetcher to return `Record<string, ModelInfo>` instead of raw HuggingFace models
+- Parse HuggingFace model data to extract:
+    - `maxTokens`
+    - `contextWindow`
+    - `supportsImages` (based on pipeline_tag)
+    - `description`
+    - Other relevant `ModelInfo` fields
+
+### Step 2: Update RouterName and modelCache
+
+- Add `"huggingface"` to the `RouterName` type in `src/shared/api.ts`
+- Add HuggingFace case to the switch statement in `modelCache.ts`
+- Update `GetModelsOptions` type to include HuggingFace
+
+### Step 3: Update HuggingFace provider
+
+- Either extend `RouterProvider` or implement `fetchModel()` pattern
+- Use `getModels()` from modelCache to fetch models
+- Remove hardcoded model info from `getModel()`
+
+### Step 4: Update webview integration
+
+- Modify `webviewMessageHandler.ts` to use the new pattern
+- Instead of importing from `src/api/huggingface-models.ts`, use `getModels()` with provider "huggingface"
+- Transform the response to match the expected format for the webview
+
+### Step 5: Cleanup
+
+- Remove `src/api/huggingface-models.ts`
+- Remove the old `src/services/huggingface-models.ts`
+- Update any other imports
+
+## Benefits of this refactoring
+
+1. **Consistency**: HuggingFace will follow the same pattern as other providers
+2. **Caching**: Model lists will be cached in memory and on disk
+3. **Maintainability**: Easier to understand and modify when all providers follow the same pattern
+4. **Type safety**: Better integration with TypeScript types
@@ -1,3 +1,7 @@
+import axios from "axios"
+import { ModelInfo } from "@roo-code/types"
+import { z } from "zod"
+
 export interface HuggingFaceModel {
 	_id: string
 	id: string
@@ -52,9 +56,8 @@ const BASE_URL = "https://huggingface.co/api/models"
 const CACHE_DURATION = 1000 * 60 * 60 // 1 hour
 
 interface CacheEntry {
-	data: HuggingFaceModel[]
+	data: Record<string, ModelInfo>
 	timestamp: number
-	status: "success" | "partial" | "error"
 }
 
 let cache: CacheEntry | null = null
@@ -95,7 +98,46 @@ const requestInit: RequestInit = {
 	mode: "cors",
 }
 
-export async function fetchHuggingFaceModels(): Promise<HuggingFaceModel[]> {
+/**
+ * Parse a HuggingFace model into ModelInfo format
+ */
+function parseHuggingFaceModel(model: HuggingFaceModel): ModelInfo {
+	// Extract context window from tokenizer config if available
+	const contextWindow = model.config.tokenizer_config?.model_max_length || 32768 // Default to 32k
+
+	// Determine if model supports images based on pipeline tag
+	const supportsImages = model.pipeline_tag === "image-text-to-text"
+
+	// Create a description from available metadata
+	const description = [
+		model.config.model_type ? `Type: ${model.config.model_type}` : null,
+		model.config.architectures?.length ? `Architecture: ${model.config.architectures[0]}` : null,
+		model.library_name ? `Library: ${model.library_name}` : null,
+		model.inferenceProviderMapping?.length
+			? `Providers: ${model.inferenceProviderMapping.map((p) => p.provider).join(", ")}`
+			: null,
+	]
+		.filter(Boolean)
+		.join(", ")
+
+	const modelInfo: ModelInfo = {
+		maxTokens: Math.min(contextWindow, 8192), // Conservative default, most models support at least 8k output
+		contextWindow,
+		supportsImages,
+		supportsPromptCache: false, // HuggingFace inference API doesn't support prompt caching
+		description,
+		// HuggingFace models through their inference API are generally free
+		inputPrice: 0,
+		outputPrice: 0,
+	}
+
+	return modelInfo
+}
+
+/**
+ * Fetch HuggingFace models and return them in ModelInfo format
+ */
+export async function getHuggingFaceModels(): Promise<Record<string, ModelInfo>> {
 	const now = Date.now()
 
 	// Check cache
@@ -104,6 +146,8 @@ export async function fetchHuggingFaceModels(): Promise<HuggingFaceModel[]> {
 		return cache.data
 	}
 
+	const models: Record<string, ModelInfo> = {}
+
 	try {
 		console.log("Fetching Hugging Face models from API...")
 
@@ -115,57 +159,49 @@ export async function fetchHuggingFaceModels(): Promise<HuggingFaceModel[]> {
 
 		let textGenModels: HuggingFaceModel[] = []
 		let imgTextModels: HuggingFaceModel[] = []
-		let hasErrors = false
 
 		// Process text-generation models
 		if (textGenResponse.status === "fulfilled" && textGenResponse.value.ok) {
 			textGenModels = await textGenResponse.value.json()
 		} else {
 			console.error("Failed to fetch text-generation models:", textGenResponse)
-			hasErrors = true
 		}
 
 		// Process image-text-to-text models
 		if (imgTextResponse.status === "fulfilled" && imgTextResponse.value.ok) {
 			imgTextModels = await imgTextResponse.value.json()
 		} else {
 			console.error("Failed to fetch image-text-to-text models:", imgTextResponse)
-			hasErrors = true
 		}
 
 		// Combine and filter models
-		const allModels = [...textGenModels, ...imgTextModels]
-			.filter((model) => model.inferenceProviderMapping.length > 0)
-			.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()))
+		const allModels = [...textGenModels, ...imgTextModels].filter(
+			(model) => model.inferenceProviderMapping.length > 0,
+		)
+
+		// Convert to ModelInfo format
+		for (const model of allModels) {
+			models[model.id] = parseHuggingFaceModel(model)
+		}
 
 		// Update cache
 		cache = {
-			data: allModels,
+			data: models,
 			timestamp: now,
-			status: hasErrors ? "partial" : "success",
 		}
 
-		console.log(`Fetched ${allModels.length} Hugging Face models (status: ${cache.status})`)
-		return allModels
+		console.log(`Fetched ${Object.keys(models).length} Hugging Face models`)
+		return models
 	} catch (error) {
 		console.error("Error fetching Hugging Face models:", error)
 
 		// Return cached data if available
 		if (cache) {
 			console.log("Using stale cached data due to fetch error")
-			cache.status = "error"
 			return cache.data
 		}
 
-		// No cache available, return empty array
-		return []
+		// No cache available, return empty object
+		return {}
 	}
 }
-
-export function getCachedModels(): HuggingFaceModel[] | null {
-	return cache?.data || null
-}
-
-export function clearCache(): void {
-	cache = null
-}
 
@@ -17,6 +17,7 @@ import { getLiteLLMModels } from "./litellm"
 import { GetModelsOptions } from "../../../shared/api"
 import { getOllamaModels } from "./ollama"
 import { getLMStudioModels } from "./lmstudio"
+import { getHuggingFaceModels } from "./huggingface"
 
 const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
 
@@ -78,6 +79,9 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 			case "lmstudio":
 				models = await getLMStudioModels(options.baseUrl)
 				break
+			case "huggingface":
+				models = await getHuggingFaceModels()
+				break
 			default: {
 				// Ensures router is exhaustively checked if RouterName is a strict union
 				const exhaustiveCheck: never = provider
 
@@ -1,38 +1,46 @@
 import OpenAI from "openai"
 import { Anthropic } from "@anthropic-ai/sdk"
 
-import type { ApiHandlerOptions } from "../../shared/api"
+import { type ModelInfo } from "@roo-code/types"
+
+import type { ApiHandlerOptions, ModelRecord } from "../../shared/api"
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { DEFAULT_HEADERS } from "./constants"
-import { BaseProvider } from "./base-provider"
-
-export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler {
-	private client: OpenAI
-	private options: ApiHandlerOptions
+import { RouterProvider } from "./router-provider"
+
+// Default model info for fallback
+const huggingFaceDefaultModelInfo: ModelInfo = {
+	maxTokens: 8192,
+	contextWindow: 131072,
+	supportsImages: false,
+	supportsPromptCache: false,
+}
 
+export class HuggingFaceHandler extends RouterProvider implements SingleCompletionHandler {
 	constructor(options: ApiHandlerOptions) {
-		super()
-		this.options = options
+		super({
+			options,
+			name: "huggingface",
+			baseURL: "https://router.huggingface.co/v1",
+			apiKey: options.huggingFaceApiKey,
+			modelId: options.huggingFaceModelId,
+			defaultModelId: "meta-llama/Llama-3.3-70B-Instruct",
+			defaultModelInfo: huggingFaceDefaultModelInfo,
+		})
 
 		if (!this.options.huggingFaceApiKey) {
 			throw new Error("Hugging Face API key is required")
 		}
-
-		this.client = new OpenAI({
-			baseURL: "https://router.huggingface.co/v1",
-			apiKey: this.options.huggingFaceApiKey,
-			defaultHeaders: DEFAULT_HEADERS,
-		})
 	}
 
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+		const { id: modelId, info } = await this.fetchModel()
 		const temperature = this.options.modelTemperature ?? 0.7
 
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
@@ -43,6 +51,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 			stream_options: { include_usage: true },
 		}
 
+		// Add max_tokens if the model info specifies it
+		if (info.maxTokens && info.maxTokens > 0) {
+			params.max_tokens = info.maxTokens
+		}
+
 		const stream = await this.client.chat.completions.create(params)
 
 		for await (const chunk of stream) {
@@ -66,13 +79,20 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+		const { id: modelId, info } = await this.fetchModel()
 
 		try {
-			const response = await this.client.chat.completions.create({
+			const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
 				messages: [{ role: "user", content: prompt }],
-			})
+			}
+
+			// Add max_tokens if the model info specifies it
+			if (info.maxTokens && info.maxTokens > 0) {
+				params.max_tokens = info.maxTokens
+			}
+
+			const response = await this.client.chat.completions.create(params)
 
 			return response.choices[0]?.message.content || ""
 		} catch (error) {
@@ -83,17 +103,4 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 			throw error
 		}
 	}
-
-	override getModel() {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
-		return {
-			id: modelId,
-			info: {
-				maxTokens: 8192,
-				contextWindow: 131072,
-				supportsImages: false,
-				supportsPromptCache: false,
-			},
-		}
-	}
 }