diff --git a/packages/inference/README.md b/packages/inference/README.md index ad5c9fc1a6..5178d14e2e 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -1,7 +1,7 @@ # 🤗 Hugging Face Inference -A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and all supported Inference Providers. -It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with all supported third-party Inference Providers. +A Typescript powered wrapper for Inference Providers (serverless) and Inference Endpoints (dedicated). +It works with [Inference Providers (serverless)](https://huggingface.co/docs/api-inference/index) – including all supported third-party Inference Providers – and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with . Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README). @@ -25,20 +25,20 @@ yarn add @huggingface/inference ```ts // esm.sh -import { InferenceClient } from "https://esm.sh/@huggingface/inference" +import { InferenceClient } from "https://esm.sh/@huggingface/inference"; // or npm: -import { InferenceClient } from "npm:@huggingface/inference" +import { InferenceClient } from "npm:@huggingface/inference"; ``` ### Initialize ```typescript -import { InferenceClient } from '@huggingface/inference' +import { InferenceClient } from '@huggingface/inference'; -const hf = new InferenceClient('your access token') +const hf = new InferenceClient('your access token'); ``` -❗**Important note:** Using an access token is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**. +❗**Important note:** Always pass an access token. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**. Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token. diff --git a/packages/mcp-client/cli.ts b/packages/mcp-client/cli.ts index 713de3b236..a838689dd6 100644 --- a/packages/mcp-client/cli.ts +++ b/packages/mcp-client/cli.ts @@ -11,6 +11,7 @@ import { version as packageVersion } from "./package.json"; const MODEL_ID = process.env.MODEL_ID ?? "Qwen/Qwen2.5-72B-Instruct"; const PROVIDER = (process.env.PROVIDER as InferenceProvider) ?? "nebius"; +const BASE_URL = process.env.BASE_URL; const SERVERS: StdioServerParameters[] = [ { @@ -48,12 +49,21 @@ async function main() { process.exit(1); } - const agent = new Agent({ - provider: PROVIDER, - model: MODEL_ID, - apiKey: process.env.HF_TOKEN, - servers: SERVERS, - }); + const agent = new Agent( + BASE_URL + ? { + baseUrl: BASE_URL, + model: MODEL_ID, + apiKey: process.env.HF_TOKEN, + servers: SERVERS, + } + : { + provider: PROVIDER, + model: MODEL_ID, + apiKey: process.env.HF_TOKEN, + servers: SERVERS, + } + ); const rl = readline.createInterface({ input: stdin, output: stdout }); let abortController = new AbortController(); diff --git a/packages/mcp-client/src/Agent.ts b/packages/mcp-client/src/Agent.ts index be4aeb3bba..4dfb1e2f5c 100644 --- a/packages/mcp-client/src/Agent.ts +++ b/packages/mcp-client/src/Agent.ts @@ -49,18 +49,28 @@ export class Agent extends McpClient { constructor({ provider, + baseUrl, model, apiKey, servers, prompt, - }: { - provider: InferenceProvider; + }: ( + | { + provider: InferenceProvider; + baseUrl?: undefined; + } + | { + baseUrl: string; + provider?: undefined; + } + ) & { model: string; apiKey: string; servers: StdioServerParameters[]; prompt?: string; }) { - super({ provider, model, apiKey }); + super(provider ? { provider, baseUrl, model, apiKey } : { provider, baseUrl, model, apiKey }); + /// ^This shenanigan is just here to please an overzealous TS type-checker. this.servers = servers; this.messages = [ { diff --git a/packages/mcp-client/src/McpClient.ts b/packages/mcp-client/src/McpClient.ts index 93f9511949..42e6eb957b 100644 --- a/packages/mcp-client/src/McpClient.ts +++ b/packages/mcp-client/src/McpClient.ts @@ -2,7 +2,7 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import type { StdioServerParameters } from "@modelcontextprotocol/sdk/client/stdio.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; import { InferenceClient } from "@huggingface/inference"; -import type { InferenceProvider } from "@huggingface/inference"; +import type { InferenceClientEndpoint, InferenceProvider } from "@huggingface/inference"; import type { ChatCompletionInputMessage, ChatCompletionInputTool, @@ -22,14 +22,32 @@ export interface ChatCompletionInputMessageTool extends ChatCompletionInputMessa } export class McpClient { - protected client: InferenceClient; - protected provider: string; + protected client: InferenceClient | InferenceClientEndpoint; + protected provider: string | undefined; + protected model: string; private clients: Map = new Map(); public readonly availableTools: ChatCompletionInputTool[] = []; - constructor({ provider, model, apiKey }: { provider: InferenceProvider; model: string; apiKey: string }) { - this.client = new InferenceClient(apiKey); + constructor({ + provider, + baseUrl, + model, + apiKey, + }: ( + | { + provider: InferenceProvider; + baseUrl?: undefined; + } + | { + baseUrl: string; + provider?: undefined; + } + ) & { + model: string; + apiKey: string; + }) { + this.client = baseUrl ? new InferenceClient(apiKey).endpoint(baseUrl) : new InferenceClient(apiKey); this.provider = provider; this.model = model; }