From e0e5416bf9abe2cd8a2eb3cc6961647f3216a845 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 25 Jul 2025 07:27:58 +0000 Subject: [PATCH 1/7] refactor: align HuggingFace provider with established pattern - Move huggingface-models.ts from src/services/ to src/api/providers/fetchers/huggingface.ts - Update fetcher to return ModelInfo records instead of raw HuggingFace models - Add HuggingFace to RouterName type and integrate with modelCache.ts - Update HuggingFace provider to extend RouterProvider base class - Remove unnecessary src/api/huggingface-models.ts wrapper - Update webviewMessageHandler to use the new pattern with getModels() - Maintain backward compatibility with webview by transforming ModelInfo to expected format --- huggingface-refactor-plan.md | 83 ++++++++++++++++++ src/api/huggingface-models.ts | 17 ---- .../providers/fetchers/huggingface.ts} | 84 +++++++++++++------ src/api/providers/fetchers/modelCache.ts | 4 + src/api/providers/huggingface.ts | 69 ++++++++------- src/core/webview/webviewMessageHandler.ts | 47 ++++++++++- src/shared/api.ts | 12 ++- 7 files changed, 239 insertions(+), 77 deletions(-) create mode 100644 huggingface-refactor-plan.md delete mode 100644 src/api/huggingface-models.ts rename src/{services/huggingface-models.ts => api/providers/fetchers/huggingface.ts} (62%) diff --git a/huggingface-refactor-plan.md b/huggingface-refactor-plan.md new file mode 100644 index 00000000000..de84d9b0c07 --- /dev/null +++ b/huggingface-refactor-plan.md @@ -0,0 +1,83 @@ +# HuggingFace Provider Refactoring Plan + +## Overview + +The HuggingFace provider implementation needs to be refactored to match the established pattern used by other providers that fetch models via network calls (e.g., OpenRouter, Glama, Ollama, etc.). + +## Current Implementation Issues + +1. **File locations are incorrect:** + + - `src/services/huggingface-models.ts` - Should be in `src/api/providers/fetchers/` + - `src/api/huggingface-models.ts` - Unnecessary wrapper, should be removed + +2. **Pattern mismatch:** + - Current implementation returns raw HuggingFace model data + - Should return `ModelInfo` records like other providers + - Not integrated with the `modelCache.ts` system + - Provider doesn't use `RouterProvider` base class or `fetchModel` pattern + +## Established Pattern (from other providers) + +### 1. Fetcher Pattern (`src/api/providers/fetchers/`) + +- Fetcher files export a function like `getHuggingFaceModels()` that returns `Record` +- Fetchers handle API calls and transform raw data to `ModelInfo` format +- Example: `getOpenRouterModels()`, `getGlamaModels()`, `getOllamaModels()` + +### 2. Provider Pattern (`src/api/providers/`) + +- Providers either: + - Extend `RouterProvider` and use `fetchModel()` (e.g., Glama) + - Implement their own `fetchModel()` pattern (e.g., OpenRouter) +- Use `getModels()` from `modelCache.ts` to fetch and cache models + +### 3. Model Cache Integration + +- `RouterName` type includes all providers that use the cache +- `modelCache.ts` has a switch statement that calls the appropriate fetcher +- Provides memory and file caching for model lists + +## Implementation Steps + +### Step 1: Create new fetcher + +- Move `src/services/huggingface-models.ts` to `src/api/providers/fetchers/huggingface.ts` +- Transform the fetcher to return `Record` instead of raw HuggingFace models +- Parse HuggingFace model data to extract: + - `maxTokens` + - `contextWindow` + - `supportsImages` (based on pipeline_tag) + - `description` + - Other relevant `ModelInfo` fields + +### Step 2: Update RouterName and modelCache + +- Add `"huggingface"` to the `RouterName` type in `src/shared/api.ts` +- Add HuggingFace case to the switch statement in `modelCache.ts` +- Update `GetModelsOptions` type to include HuggingFace + +### Step 3: Update HuggingFace provider + +- Either extend `RouterProvider` or implement `fetchModel()` pattern +- Use `getModels()` from modelCache to fetch models +- Remove hardcoded model info from `getModel()` + +### Step 4: Update webview integration + +- Modify `webviewMessageHandler.ts` to use the new pattern +- Instead of importing from `src/api/huggingface-models.ts`, use `getModels()` with provider "huggingface" +- Transform the response to match the expected format for the webview + +### Step 5: Cleanup + +- Remove `src/api/huggingface-models.ts` +- Remove the old `src/services/huggingface-models.ts` +- Update any other imports + +## Benefits of this refactoring + +1. **Consistency**: HuggingFace will follow the same pattern as other providers +2. **Caching**: Model lists will be cached in memory and on disk +3. **Maintainability**: Easier to understand and modify when all providers follow the same pattern +4. **Type safety**: Better integration with TypeScript types diff --git a/src/api/huggingface-models.ts b/src/api/huggingface-models.ts deleted file mode 100644 index ec1915d0e3d..00000000000 --- a/src/api/huggingface-models.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { fetchHuggingFaceModels, type HuggingFaceModel } from "../services/huggingface-models" - -export interface HuggingFaceModelsResponse { - models: HuggingFaceModel[] - cached: boolean - timestamp: number -} - -export async function getHuggingFaceModels(): Promise { - const models = await fetchHuggingFaceModels() - - return { - models, - cached: false, // We could enhance this to track if data came from cache - timestamp: Date.now(), - } -} diff --git a/src/services/huggingface-models.ts b/src/api/providers/fetchers/huggingface.ts similarity index 62% rename from src/services/huggingface-models.ts rename to src/api/providers/fetchers/huggingface.ts index 9c0bc406f93..2b2f6312fdb 100644 --- a/src/services/huggingface-models.ts +++ b/src/api/providers/fetchers/huggingface.ts @@ -1,3 +1,7 @@ +import axios from "axios" +import { ModelInfo } from "@roo-code/types" +import { z } from "zod" + export interface HuggingFaceModel { _id: string id: string @@ -52,9 +56,8 @@ const BASE_URL = "https://huggingface.co/api/models" const CACHE_DURATION = 1000 * 60 * 60 // 1 hour interface CacheEntry { - data: HuggingFaceModel[] + data: Record timestamp: number - status: "success" | "partial" | "error" } let cache: CacheEntry | null = null @@ -95,7 +98,46 @@ const requestInit: RequestInit = { mode: "cors", } -export async function fetchHuggingFaceModels(): Promise { +/** + * Parse a HuggingFace model into ModelInfo format + */ +function parseHuggingFaceModel(model: HuggingFaceModel): ModelInfo { + // Extract context window from tokenizer config if available + const contextWindow = model.config.tokenizer_config?.model_max_length || 32768 // Default to 32k + + // Determine if model supports images based on pipeline tag + const supportsImages = model.pipeline_tag === "image-text-to-text" + + // Create a description from available metadata + const description = [ + model.config.model_type ? `Type: ${model.config.model_type}` : null, + model.config.architectures?.length ? `Architecture: ${model.config.architectures[0]}` : null, + model.library_name ? `Library: ${model.library_name}` : null, + model.inferenceProviderMapping?.length + ? `Providers: ${model.inferenceProviderMapping.map((p) => p.provider).join(", ")}` + : null, + ] + .filter(Boolean) + .join(", ") + + const modelInfo: ModelInfo = { + maxTokens: Math.min(contextWindow, 8192), // Conservative default, most models support at least 8k output + contextWindow, + supportsImages, + supportsPromptCache: false, // HuggingFace inference API doesn't support prompt caching + description, + // HuggingFace models through their inference API are generally free + inputPrice: 0, + outputPrice: 0, + } + + return modelInfo +} + +/** + * Fetch HuggingFace models and return them in ModelInfo format + */ +export async function getHuggingFaceModels(): Promise> { const now = Date.now() // Check cache @@ -104,6 +146,8 @@ export async function fetchHuggingFaceModels(): Promise { return cache.data } + const models: Record = {} + try { console.log("Fetching Hugging Face models from API...") @@ -115,14 +159,12 @@ export async function fetchHuggingFaceModels(): Promise { let textGenModels: HuggingFaceModel[] = [] let imgTextModels: HuggingFaceModel[] = [] - let hasErrors = false // Process text-generation models if (textGenResponse.status === "fulfilled" && textGenResponse.value.ok) { textGenModels = await textGenResponse.value.json() } else { console.error("Failed to fetch text-generation models:", textGenResponse) - hasErrors = true } // Process image-text-to-text models @@ -130,42 +172,36 @@ export async function fetchHuggingFaceModels(): Promise { imgTextModels = await imgTextResponse.value.json() } else { console.error("Failed to fetch image-text-to-text models:", imgTextResponse) - hasErrors = true } // Combine and filter models - const allModels = [...textGenModels, ...imgTextModels] - .filter((model) => model.inferenceProviderMapping.length > 0) - .sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase())) + const allModels = [...textGenModels, ...imgTextModels].filter( + (model) => model.inferenceProviderMapping.length > 0, + ) + + // Convert to ModelInfo format + for (const model of allModels) { + models[model.id] = parseHuggingFaceModel(model) + } // Update cache cache = { - data: allModels, + data: models, timestamp: now, - status: hasErrors ? "partial" : "success", } - console.log(`Fetched ${allModels.length} Hugging Face models (status: ${cache.status})`) - return allModels + console.log(`Fetched ${Object.keys(models).length} Hugging Face models`) + return models } catch (error) { console.error("Error fetching Hugging Face models:", error) // Return cached data if available if (cache) { console.log("Using stale cached data due to fetch error") - cache.status = "error" return cache.data } - // No cache available, return empty array - return [] + // No cache available, return empty object + return {} } } - -export function getCachedModels(): HuggingFaceModel[] | null { - return cache?.data || null -} - -export function clearCache(): void { - cache = null -} diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts index fef700268dc..0eb35eedba0 100644 --- a/src/api/providers/fetchers/modelCache.ts +++ b/src/api/providers/fetchers/modelCache.ts @@ -17,6 +17,7 @@ import { getLiteLLMModels } from "./litellm" import { GetModelsOptions } from "../../../shared/api" import { getOllamaModels } from "./ollama" import { getLMStudioModels } from "./lmstudio" +import { getHuggingFaceModels } from "./huggingface" const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 }) @@ -78,6 +79,9 @@ export const getModels = async (options: GetModelsOptions): Promise case "lmstudio": models = await getLMStudioModels(options.baseUrl) break + case "huggingface": + models = await getHuggingFaceModels() + break default: { // Ensures router is exhaustively checked if RouterName is a strict union const exhaustiveCheck: never = provider diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 913605bd929..3370e764d0e 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -1,30 +1,38 @@ import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" -import type { ApiHandlerOptions } from "../../shared/api" +import { type ModelInfo } from "@roo-code/types" + +import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { DEFAULT_HEADERS } from "./constants" -import { BaseProvider } from "./base-provider" - -export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler { - private client: OpenAI - private options: ApiHandlerOptions +import { RouterProvider } from "./router-provider" + +// Default model info for fallback +const huggingFaceDefaultModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, +} +export class HuggingFaceHandler extends RouterProvider implements SingleCompletionHandler { constructor(options: ApiHandlerOptions) { - super() - this.options = options + super({ + options, + name: "huggingface", + baseURL: "https://router.huggingface.co/v1", + apiKey: options.huggingFaceApiKey, + modelId: options.huggingFaceModelId, + defaultModelId: "meta-llama/Llama-3.3-70B-Instruct", + defaultModelInfo: huggingFaceDefaultModelInfo, + }) if (!this.options.huggingFaceApiKey) { throw new Error("Hugging Face API key is required") } - - this.client = new OpenAI({ - baseURL: "https://router.huggingface.co/v1", - apiKey: this.options.huggingFaceApiKey, - defaultHeaders: DEFAULT_HEADERS, - }) } override async *createMessage( @@ -32,7 +40,7 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" + const { id: modelId, info } = await this.fetchModel() const temperature = this.options.modelTemperature ?? 0.7 const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { @@ -43,6 +51,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion stream_options: { include_usage: true }, } + // Add max_tokens if the model info specifies it + if (info.maxTokens && info.maxTokens > 0) { + params.max_tokens = info.maxTokens + } + const stream = await this.client.chat.completions.create(params) for await (const chunk of stream) { @@ -66,13 +79,20 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion } async completePrompt(prompt: string): Promise { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" + const { id: modelId, info } = await this.fetchModel() try { - const response = await this.client.chat.completions.create({ + const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: [{ role: "user", content: prompt }], - }) + } + + // Add max_tokens if the model info specifies it + if (info.maxTokens && info.maxTokens > 0) { + params.max_tokens = info.maxTokens + } + + const response = await this.client.chat.completions.create(params) return response.choices[0]?.message.content || "" } catch (error) { @@ -83,17 +103,4 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion throw error } } - - override getModel() { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" - return { - id: modelId, - info: { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - }, - } - } } diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index c739c2ade8d..3530c9d3604 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -522,6 +522,7 @@ export const webviewMessageHandler = async ( litellm: {}, ollama: {}, lmstudio: {}, + huggingface: {}, } const safeGetModels = async (options: GetModelsOptions): Promise => { @@ -564,9 +565,10 @@ export const webviewMessageHandler = async ( const fetchedRouterModels: Partial> = { ...routerModels, - // Initialize ollama and lmstudio with empty objects since they use separate handlers + // Initialize ollama, lmstudio, and huggingface with empty objects since they use separate handlers ollama: {}, lmstudio: {}, + huggingface: {}, } results.forEach((result, index) => { @@ -676,11 +678,48 @@ export const webviewMessageHandler = async ( break case "requestHuggingFaceModels": try { - const { getHuggingFaceModels } = await import("../../api/huggingface-models") - const huggingFaceModelsResponse = await getHuggingFaceModels() + // Flush cache first to ensure fresh models + await flushModels("huggingface") + + const huggingFaceModels = await getModels({ + provider: "huggingface", + }) + + // Convert the model record to an array format expected by the webview + const modelArray = Object.entries(huggingFaceModels).map(([id, info]) => ({ + id, + _id: id, + inferenceProviderMapping: [ + { + provider: "huggingface", + providerId: id, + status: "live" as const, + task: "conversational" as const, + }, + ], + trendingScore: 0, + config: { + architectures: [], + model_type: + info.description + ?.split(", ") + .find((part) => part.startsWith("Type: ")) + ?.replace("Type: ", "") || "", + tokenizer_config: { + model_max_length: info.contextWindow, + }, + }, + tags: [], + pipeline_tag: info.supportsImages ? ("image-text-to-text" as const) : ("text-generation" as const), + library_name: info.description + ?.split(", ") + .find((part) => part.startsWith("Library: ")) + ?.replace("Library: ", ""), + })) + provider.postMessageToWebview({ type: "huggingFaceModels", - huggingFaceModels: huggingFaceModelsResponse.models, + huggingFaceModels: modelArray, }) } catch (error) { console.error("Failed to fetch Hugging Face models:", error) diff --git a/src/shared/api.ts b/src/shared/api.ts index 8cbfc721336..705e5d832fd 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -11,7 +11,16 @@ export type ApiHandlerOptions = Omit // RouterName -const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm", "ollama", "lmstudio"] as const +const routerNames = [ + "openrouter", + "requesty", + "glama", + "unbound", + "litellm", + "ollama", + "lmstudio", + "huggingface", +] as const export type RouterName = (typeof routerNames)[number] @@ -113,3 +122,4 @@ export type GetModelsOptions = | { provider: "litellm"; apiKey: string; baseUrl: string } | { provider: "ollama"; baseUrl?: string } | { provider: "lmstudio"; baseUrl?: string } + | { provider: "huggingface" } From 39927850e7a8699f7d5b8df480490ea02126ad2e Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 25 Jul 2025 07:29:17 +0000 Subject: [PATCH 2/7] fix: add huggingface to RouterModels mock in webview tests --- webview-ui/src/utils/__tests__/validate.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/webview-ui/src/utils/__tests__/validate.test.ts b/webview-ui/src/utils/__tests__/validate.test.ts index 3a60c27f8ad..01452995bc1 100644 --- a/webview-ui/src/utils/__tests__/validate.test.ts +++ b/webview-ui/src/utils/__tests__/validate.test.ts @@ -38,6 +38,7 @@ describe("Model Validation Functions", () => { litellm: {}, ollama: {}, lmstudio: {}, + huggingface: {}, } const allowAllOrganization: OrganizationAllowList = { From e63c1db90ef7eed35d58b43b6cdc3535db171cb5 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 25 Jul 2025 07:31:40 +0000 Subject: [PATCH 3/7] chore: add PR body file --- pr-body.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 pr-body.md diff --git a/pr-body.md b/pr-body.md new file mode 100644 index 00000000000..e8311bf867d --- /dev/null +++ b/pr-body.md @@ -0,0 +1,26 @@ +## Summary + +This PR refactors the HuggingFace provider implementation to match the established pattern used by other providers that fetch models via network calls (e.g., OpenRouter, Glama, Ollama). + +## Changes + +- **Moved fetcher to correct location**: Moved `huggingface-models.ts` from `src/services/` to `src/api/providers/fetchers/huggingface.ts` +- **Updated fetcher to return ModelInfo**: The fetcher now returns `Record` instead of raw HuggingFace model data, consistent with other providers +- **Integrated with model cache**: Added HuggingFace to `RouterName` type and integrated it with the `modelCache.ts` system for memory and file caching +- **Updated provider to extend RouterProvider**: The HuggingFace provider now extends the `RouterProvider` base class and uses the `fetchModel()` pattern +- **Removed unnecessary wrapper**: Deleted `src/api/huggingface-models.ts` as it's no longer needed +- **Updated webview integration**: Modified `webviewMessageHandler.ts` to use the new pattern with `getModels()` while maintaining backward compatibility + +## Benefits + +1. **Consistency**: HuggingFace now follows the same pattern as other providers +2. **Caching**: Model lists are now cached in memory and on disk +3. **Maintainability**: Easier to understand and modify when all providers follow the same pattern +4. **Type safety**: Better integration with TypeScript types + +## Testing + +- ✅ All existing tests pass +- ✅ TypeScript compilation successful +- ✅ Linting checks pass +- ✅ Added HuggingFace to RouterModels mock in webview tests From 779872e71a24d38b9aa83a5de4a0899719cc1656 Mon Sep 17 00:00:00 2001 From: Chris Estreich Date: Fri, 25 Jul 2025 00:32:42 -0700 Subject: [PATCH 4/7] Delete pr-body.md --- pr-body.md | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 pr-body.md diff --git a/pr-body.md b/pr-body.md deleted file mode 100644 index e8311bf867d..00000000000 --- a/pr-body.md +++ /dev/null @@ -1,26 +0,0 @@ -## Summary - -This PR refactors the HuggingFace provider implementation to match the established pattern used by other providers that fetch models via network calls (e.g., OpenRouter, Glama, Ollama). - -## Changes - -- **Moved fetcher to correct location**: Moved `huggingface-models.ts` from `src/services/` to `src/api/providers/fetchers/huggingface.ts` -- **Updated fetcher to return ModelInfo**: The fetcher now returns `Record` instead of raw HuggingFace model data, consistent with other providers -- **Integrated with model cache**: Added HuggingFace to `RouterName` type and integrated it with the `modelCache.ts` system for memory and file caching -- **Updated provider to extend RouterProvider**: The HuggingFace provider now extends the `RouterProvider` base class and uses the `fetchModel()` pattern -- **Removed unnecessary wrapper**: Deleted `src/api/huggingface-models.ts` as it's no longer needed -- **Updated webview integration**: Modified `webviewMessageHandler.ts` to use the new pattern with `getModels()` while maintaining backward compatibility - -## Benefits - -1. **Consistency**: HuggingFace now follows the same pattern as other providers -2. **Caching**: Model lists are now cached in memory and on disk -3. **Maintainability**: Easier to understand and modify when all providers follow the same pattern -4. **Type safety**: Better integration with TypeScript types - -## Testing - -- ✅ All existing tests pass -- ✅ TypeScript compilation successful -- ✅ Linting checks pass -- ✅ Added HuggingFace to RouterModels mock in webview tests From af4d723d54507006dfe899091b08900e54b2e6b9 Mon Sep 17 00:00:00 2001 From: Chris Estreich Date: Fri, 25 Jul 2025 00:32:56 -0700 Subject: [PATCH 5/7] Delete huggingface-refactor-plan.md --- huggingface-refactor-plan.md | 83 ------------------------------------ 1 file changed, 83 deletions(-) delete mode 100644 huggingface-refactor-plan.md diff --git a/huggingface-refactor-plan.md b/huggingface-refactor-plan.md deleted file mode 100644 index de84d9b0c07..00000000000 --- a/huggingface-refactor-plan.md +++ /dev/null @@ -1,83 +0,0 @@ -# HuggingFace Provider Refactoring Plan - -## Overview - -The HuggingFace provider implementation needs to be refactored to match the established pattern used by other providers that fetch models via network calls (e.g., OpenRouter, Glama, Ollama, etc.). - -## Current Implementation Issues - -1. **File locations are incorrect:** - - - `src/services/huggingface-models.ts` - Should be in `src/api/providers/fetchers/` - - `src/api/huggingface-models.ts` - Unnecessary wrapper, should be removed - -2. **Pattern mismatch:** - - Current implementation returns raw HuggingFace model data - - Should return `ModelInfo` records like other providers - - Not integrated with the `modelCache.ts` system - - Provider doesn't use `RouterProvider` base class or `fetchModel` pattern - -## Established Pattern (from other providers) - -### 1. Fetcher Pattern (`src/api/providers/fetchers/`) - -- Fetcher files export a function like `getHuggingFaceModels()` that returns `Record` -- Fetchers handle API calls and transform raw data to `ModelInfo` format -- Example: `getOpenRouterModels()`, `getGlamaModels()`, `getOllamaModels()` - -### 2. Provider Pattern (`src/api/providers/`) - -- Providers either: - - Extend `RouterProvider` and use `fetchModel()` (e.g., Glama) - - Implement their own `fetchModel()` pattern (e.g., OpenRouter) -- Use `getModels()` from `modelCache.ts` to fetch and cache models - -### 3. Model Cache Integration - -- `RouterName` type includes all providers that use the cache -- `modelCache.ts` has a switch statement that calls the appropriate fetcher -- Provides memory and file caching for model lists - -## Implementation Steps - -### Step 1: Create new fetcher - -- Move `src/services/huggingface-models.ts` to `src/api/providers/fetchers/huggingface.ts` -- Transform the fetcher to return `Record` instead of raw HuggingFace models -- Parse HuggingFace model data to extract: - - `maxTokens` - - `contextWindow` - - `supportsImages` (based on pipeline_tag) - - `description` - - Other relevant `ModelInfo` fields - -### Step 2: Update RouterName and modelCache - -- Add `"huggingface"` to the `RouterName` type in `src/shared/api.ts` -- Add HuggingFace case to the switch statement in `modelCache.ts` -- Update `GetModelsOptions` type to include HuggingFace - -### Step 3: Update HuggingFace provider - -- Either extend `RouterProvider` or implement `fetchModel()` pattern -- Use `getModels()` from modelCache to fetch models -- Remove hardcoded model info from `getModel()` - -### Step 4: Update webview integration - -- Modify `webviewMessageHandler.ts` to use the new pattern -- Instead of importing from `src/api/huggingface-models.ts`, use `getModels()` with provider "huggingface" -- Transform the response to match the expected format for the webview - -### Step 5: Cleanup - -- Remove `src/api/huggingface-models.ts` -- Remove the old `src/services/huggingface-models.ts` -- Update any other imports - -## Benefits of this refactoring - -1. **Consistency**: HuggingFace will follow the same pattern as other providers -2. **Caching**: Model lists will be cached in memory and on disk -3. **Maintainability**: Easier to understand and modify when all providers follow the same pattern -4. **Type safety**: Better integration with TypeScript types From ef28685feae2ea72ff184d0f1cf78d0fd5b568fb Mon Sep 17 00:00:00 2001 From: Chris Estreich Date: Fri, 25 Jul 2025 00:36:48 -0700 Subject: [PATCH 6/7] Update src/api/providers/fetchers/huggingface.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- src/api/providers/fetchers/huggingface.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/api/providers/fetchers/huggingface.ts b/src/api/providers/fetchers/huggingface.ts index 2b2f6312fdb..55b17ed9507 100644 --- a/src/api/providers/fetchers/huggingface.ts +++ b/src/api/providers/fetchers/huggingface.ts @@ -1,4 +1,3 @@ -import axios from "axios" import { ModelInfo } from "@roo-code/types" import { z } from "zod" From 86e05ab271700343f2b4267c7721a6b9aa36723b Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 25 Jul 2025 07:47:30 +0000 Subject: [PATCH 7/7] refactor: remove requestHuggingFaceModels and use requestRouterModels instead - Removed requestHuggingFaceModels message type from WebviewMessage.ts - Removed requestHuggingFaceModels handler from webviewMessageHandler.ts - Removed huggingFaceModels from ExtensionMessage.ts - Updated HuggingFace.tsx component to use requestRouterModels and extract HuggingFace models from the routerModels response This simplifies the codebase by using a single unified message type for all router-based model providers. --- src/core/webview/webviewMessageHandler.ts | 57 +------------------ src/shared/ExtensionMessage.ts | 23 -------- src/shared/WebviewMessage.ts | 1 - .../settings/providers/HuggingFace.tsx | 44 +++++++++++++- 4 files changed, 42 insertions(+), 83 deletions(-) diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 3530c9d3604..a0e87a49afd 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -522,7 +522,6 @@ export const webviewMessageHandler = async ( litellm: {}, ollama: {}, lmstudio: {}, - huggingface: {}, } const safeGetModels = async (options: GetModelsOptions): Promise => { @@ -565,10 +564,9 @@ export const webviewMessageHandler = async ( const fetchedRouterModels: Partial> = { ...routerModels, - // Initialize ollama, lmstudio, and huggingface with empty objects since they use separate handlers + // Initialize ollama and lmstudio with empty objects since they use separate handlers ollama: {}, lmstudio: {}, - huggingface: {}, } results.forEach((result, index) => { @@ -676,59 +674,6 @@ export const webviewMessageHandler = async ( // TODO: Cache like we do for OpenRouter, etc? provider.postMessageToWebview({ type: "vsCodeLmModels", vsCodeLmModels }) break - case "requestHuggingFaceModels": - try { - // Flush cache first to ensure fresh models - await flushModels("huggingface") - - const huggingFaceModels = await getModels({ - provider: "huggingface", - }) - - // Convert the model record to an array format expected by the webview - const modelArray = Object.entries(huggingFaceModels).map(([id, info]) => ({ - id, - _id: id, - inferenceProviderMapping: [ - { - provider: "huggingface", - providerId: id, - status: "live" as const, - task: "conversational" as const, - }, - ], - trendingScore: 0, - config: { - architectures: [], - model_type: - info.description - ?.split(", ") - .find((part) => part.startsWith("Type: ")) - ?.replace("Type: ", "") || "", - tokenizer_config: { - model_max_length: info.contextWindow, - }, - }, - tags: [], - pipeline_tag: info.supportsImages ? ("image-text-to-text" as const) : ("text-generation" as const), - library_name: info.description - ?.split(", ") - .find((part) => part.startsWith("Library: ")) - ?.replace("Library: ", ""), - })) - - provider.postMessageToWebview({ - type: "huggingFaceModels", - huggingFaceModels: modelArray, - }) - } catch (error) { - console.error("Failed to fetch Hugging Face models:", error) - provider.postMessageToWebview({ - type: "huggingFaceModels", - huggingFaceModels: [], - }) - } - break case "openImage": openImage(message.text!, { values: message.values }) break diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 000762e317a..2eb08ac7cf6 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -67,7 +67,6 @@ export interface ExtensionMessage { | "ollamaModels" | "lmStudioModels" | "vsCodeLmModels" - | "huggingFaceModels" | "vsCodeLmApiAvailable" | "updatePrompt" | "systemPrompt" @@ -137,28 +136,6 @@ export interface ExtensionMessage { ollamaModels?: string[] lmStudioModels?: string[] vsCodeLmModels?: { vendor?: string; family?: string; version?: string; id?: string }[] - huggingFaceModels?: Array<{ - _id: string - id: string - inferenceProviderMapping: Array<{ - provider: string - providerId: string - status: "live" | "staging" | "error" - task: "conversational" - }> - trendingScore: number - config: { - architectures: string[] - model_type: string - tokenizer_config?: { - chat_template?: string | Array<{ name: string; template: string }> - model_max_length?: number - } - } - tags: string[] - pipeline_tag: "text-generation" | "image-text-to-text" - library_name?: string - }> mcpServers?: McpServer[] commits?: GitCommit[] listApiConfig?: ProviderSettingsEntry[] diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 795e2765222..53b4fa92a7e 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -67,7 +67,6 @@ export interface WebviewMessage { | "requestOllamaModels" | "requestLmStudioModels" | "requestVsCodeLmModels" - | "requestHuggingFaceModels" | "openImage" | "saveImage" | "openFile" diff --git a/webview-ui/src/components/settings/providers/HuggingFace.tsx b/webview-ui/src/components/settings/providers/HuggingFace.tsx index d4195492dd7..ee2dc56b53f 100644 --- a/webview-ui/src/components/settings/providers/HuggingFace.tsx +++ b/webview-ui/src/components/settings/providers/HuggingFace.tsx @@ -62,7 +62,7 @@ export const HuggingFace = ({ apiConfiguration, setApiConfigurationField }: Hugg // Fetch models when component mounts useEffect(() => { setLoading(true) - vscode.postMessage({ type: "requestHuggingFaceModels" }) + vscode.postMessage({ type: "requestRouterModels" }) }, []) // Handle messages from extension @@ -70,8 +70,46 @@ export const HuggingFace = ({ apiConfiguration, setApiConfigurationField }: Hugg const message: ExtensionMessage = event.data switch (message.type) { - case "huggingFaceModels": - setModels(message.huggingFaceModels || []) + case "routerModels": + // Extract HuggingFace models from routerModels + if (message.routerModels?.huggingface) { + // Convert from ModelRecord format to HuggingFaceModel array format + const modelArray = Object.entries(message.routerModels.huggingface).map(([id, info]) => ({ + id, + _id: id, + inferenceProviderMapping: [ + { + provider: "huggingface", + providerId: id, + status: "live" as const, + task: "conversational" as const, + }, + ], + trendingScore: 0, + config: { + architectures: [], + model_type: + info.description + ?.split(", ") + .find((part: string) => part.startsWith("Type: ")) + ?.replace("Type: ", "") || "", + tokenizer_config: { + model_max_length: info.contextWindow, + }, + }, + tags: [], + pipeline_tag: info.supportsImages + ? ("image-text-to-text" as const) + : ("text-generation" as const), + library_name: info.description + ?.split(", ") + .find((part: string) => part.startsWith("Library: ")) + ?.replace("Library: ", ""), + })) + setModels(modelArray) + } else { + setModels([]) + } setLoading(false) break }