Skip to content
19 changes: 10 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ await uploadFile({

await inference.chatCompletion({
model: "meta-llama/Llama-3.1-8B-Instruct",
provider: "sambanova", // or together, fal-ai, replicate, cohere …
messages: [
{
role: "user",
Expand All @@ -39,11 +40,11 @@ await inference.chatCompletion({
],
max_tokens: 512,
temperature: 0.5,
provider: "sambanova", // or together, fal-ai, replicate, cohere …
});

await inference.textToImage({
model: "black-forest-labs/FLUX.1-dev",
provider: "replicate",
inputs: "a picture of a green bird",
});

Expand All @@ -54,7 +55,7 @@ await inference.textToImage({

This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.

- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and third-party Inference Providers to make calls to 100,000+ Machine Learning models
- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
- [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
- [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
- [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
Expand Down Expand Up @@ -84,7 +85,7 @@ npm install @huggingface/agents
Then import the libraries in your code:

```ts
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";
import { HfAgent } from "@huggingface/agents";
import { createRepo, commit, deleteRepo, listFiles } from "@huggingface/hub";
import type { RepoId } from "@huggingface/hub";
Expand All @@ -96,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or

```html
<script type="module">
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
</script>
```
Expand All @@ -105,12 +106,12 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or

```ts
// esm.sh
import { HfInference } from "https://esm.sh/@huggingface/inference"
import { InferenceClient } from "https://esm.sh/@huggingface/inference"
import { HfAgent } from "https://esm.sh/@huggingface/agents";

import { createRepo, commit, deleteRepo, listFiles } from "https://esm.sh/@huggingface/hub"
// or npm:
import { HfInference } from "npm:@huggingface/inference"
import { InferenceClient } from "npm:@huggingface/inference"
import { HfAgent } from "npm:@huggingface/agents";

import { createRepo, commit, deleteRepo, listFiles } from "npm:@huggingface/hub"
Expand All @@ -123,11 +124,11 @@ Get your HF access token in your [account settings](https://huggingface.co/setti
### @huggingface/inference examples

```ts
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";

const HF_TOKEN = "hf_...";

const inference = new HfInference(HF_TOKEN);
const inference = new InferenceClient(HF_TOKEN);

// Chat completion API
const out = await inference.chatCompletion({
Expand Down Expand Up @@ -179,7 +180,7 @@ await inference.imageToText({

// Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });

// Chat Completion
const llamaEndpoint = inference.endpoint(
Expand Down
4 changes: 2 additions & 2 deletions e2e/deno/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HfInference } from "npm:@huggingface/inference@*";
import { InferenceClient } from "npm:@huggingface/inference@*";
import { whoAmI, listFiles } from "npm:@huggingface/hub@*";

const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
Expand All @@ -10,7 +10,7 @@ for await (const file of listFiles({ repo: "gpt2" })) {

const token = Deno.env.get("HF_TOKEN");
if (token) {
const hf = new HfInference(token);
const hf = new InferenceClient(token);

const tokenInfo = await whoAmI({ credentials: { accessToken: token } });
console.log(tokenInfo);
Expand Down
4 changes: 2 additions & 2 deletions e2e/svelte/src/routes/+page.svelte
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<script>
import { whoAmI, listFiles } from "@huggingface/hub";
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";

const hf = new HfInference();
const hf = new InferenceClient();

const test = async () => {
const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
Expand Down
4 changes: 2 additions & 2 deletions e2e/ts/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";
import { whoAmI } from "@huggingface/hub";

const hfToken = process.env.token;

const hf = new HfInference(hfToken);
const hf = new InferenceClient(hfToken);

(async () => {
const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
Expand Down
4 changes: 2 additions & 2 deletions packages/agents/src/lib/evalBuilder.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";
import type { Data, Tool } from "../types";

// this function passes the tools & files to the context before calling eval
Expand All @@ -17,7 +17,7 @@ export async function evalBuilder(

// add tools to context
for (const tool of tools) {
const toolCall = (input: Promise<Data>) => tool.call?.(input, new HfInference(accessToken ?? ""));
const toolCall = (input: Promise<Data>) => tool.call?.(input, new InferenceClient(accessToken ?? ""));
// @ts-expect-error adding to the scope
globalThis[tool.name] = toolCall;
}
Expand Down
6 changes: 3 additions & 3 deletions packages/agents/src/llms/LLMHF.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import type { LLM } from "../types";
import { HfInference } from "@huggingface/inference";
import { InferenceClient } from "@huggingface/inference";

export function LLMFromHub(accessToken?: string, model?: string): LLM {
const inference = new HfInference(accessToken);
const inference = new InferenceClient(accessToken);

return async (prompt: string): Promise<string> => {
const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
Expand All @@ -20,7 +20,7 @@ export function LLMFromHub(accessToken?: string, model?: string): LLM {
}

export function LLMFromEndpoint(accessToken: string, endpoint: string): LLM {
const inference = new HfInference(accessToken).endpoint(endpoint);
const inference = new InferenceClient(accessToken).endpoint(endpoint);
return async (prompt: string): Promise<string> => {
const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";

Expand Down
4 changes: 2 additions & 2 deletions packages/agents/src/types.d.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import type { HfInference } from "@huggingface/inference";
import type { InferenceClient } from "@huggingface/inference";

export type Data = string | Blob | ArrayBuffer;

export interface Tool {
name: string;
description: string;
examples: Array<Example>;
call?: (input: Promise<Data>, inference: HfInference) => Promise<Data>;
call?: (input: Promise<Data>, inference: InferenceClient) => Promise<Data>;
}

export interface Example {
Expand Down
4 changes: 2 additions & 2 deletions packages/agents/test/HfAgent.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, it } from "vitest";
import { HfAgent, defaultTools, LLMFromHub, LLMFromEndpoint } from "../src";
import type { Data } from "../src/types";
import type { HfInference } from "@huggingface/inference";
import type { InferenceClient } from "@huggingface/inference";

const env = import.meta.env;
if (!env.HF_TOKEN) {
Expand Down Expand Up @@ -33,7 +33,7 @@ describe("HfAgent", () => {
},
],
// eslint-disable-next-line @typescript-eslint/no-unused-vars
call: async (input: Promise<Data>, inference: HfInference): Promise<Data> => {
call: async (input: Promise<Data>, inference: InferenceClient): Promise<Data> => {
const data = await input;
if (typeof data !== "string") {
throw new Error("Input must be a string");
Expand Down
22 changes: 11 additions & 11 deletions packages/inference/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# 🤗 Hugging Face Inference

A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.
A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and all supported Inference Providers.
It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with all supported third-party Inference Providers.

Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).

Expand All @@ -25,24 +25,24 @@ yarn add @huggingface/inference

```ts
// esm.sh
import { HfInference } from "https://esm.sh/@huggingface/inference"
import { InferenceClient } from "https://esm.sh/@huggingface/inference"
// or npm:
import { HfInference } from "npm:@huggingface/inference"
import { InferenceClient } from "npm:@huggingface/inference"
```

### Initialize

```typescript
import { HfInference } from '@huggingface/inference'
import { InferenceClient } from '@huggingface/inference'

const hf = new HfInference('your access token')
const hf = new InferenceClient('your access token')
```

❗**Important note:** Using an access token is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**.

Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.

### Third-party inference providers
### All supported inference providers

You can send inference requests to third-party providers with the inference client.

Expand All @@ -63,7 +63,7 @@ To send requests to a third-party provider, you have to pass the `provider` para
```ts
const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example)

const client = new HfInference(accessToken);
const client = new InferenceClient(accessToken);
await client.textToImage({
provider: "replicate",
model:"black-forest-labs/Flux.1-dev",
Expand Down Expand Up @@ -93,7 +93,7 @@ This is not an issue for LLMs as everyone converged on the OpenAI API anyways, b

### Tree-shaking

You can import the functions you need directly from the module instead of using the `HfInference` class.
You can import the functions you need directly from the module instead of using the `InferenceClient` class.

```ts
import { textGeneration } from "@huggingface/inference";
Expand Down Expand Up @@ -165,7 +165,7 @@ for await (const chunk of hf.chatCompletionStream({
It's also possible to call Mistral or OpenAI endpoints directly:

```typescript
const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
const openai = new InferenceClient(OPENAI_TOKEN).endpoint("https://api.openai.com");

let out = "";
for await (const chunk of openai.chatCompletionStream({
Expand Down Expand Up @@ -602,7 +602,7 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
```typescript
// Chat Completion Example
const MISTRAL_KEY = process.env.MISTRAL_KEY;
const hf = new HfInference(MISTRAL_KEY);
const hf = new InferenceClient(MISTRAL_KEY);
const ep = hf.endpoint("https://api.mistral.ai");
const stream = ep.chatCompletionStream({
model: "mistral-tiny",
Expand Down
8 changes: 4 additions & 4 deletions packages/inference/scripts/generate-dts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ for (const dir of dirs) {

appendFileSync(
"./dist/index.d.ts",
`export class HfInference {
`export class InferenceClient {
\tconstructor(accessToken?: string, defaultOptions?: Options);
\t/**
\t * Returns copy of HfInference tied to a specified endpoint.
\t * Returns copy of InferenceClient tied to a specified endpoint.
\t */
\tendpoint(endpointUrl: string): HfInferenceEndpoint;
\tendpoint(endpointUrl: string): InferenceClientEndpoint;
` +
fns
.map(
Expand All @@ -84,7 +84,7 @@ appendFileSync(

appendFileSync(
"./dist/index.d.ts",
`export class HfInferenceEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
`export class InferenceClientEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
fns
.map(
(fn) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type TaskWithNoAccessTokenNoEndpointUrl = {
) => ReturnType<Task[key]>;
};

export class HfInference {
export class InferenceClient {
private readonly accessToken: string;
private readonly defaultOptions: Options;

Expand All @@ -40,14 +40,14 @@ export class HfInference {
}

/**
* Returns copy of HfInference tied to a specified endpoint.
* Returns copy of InferenceClient tied to a specified endpoint.
*/
public endpoint(endpointUrl: string): HfInferenceEndpoint {
return new HfInferenceEndpoint(endpointUrl, this.accessToken, this.defaultOptions);
public endpoint(endpointUrl: string): InferenceClientEndpoint {
return new InferenceClientEndpoint(endpointUrl, this.accessToken, this.defaultOptions);
}
}

export class HfInferenceEndpoint {
export class InferenceClientEndpoint {
constructor(endpointUrl: string, accessToken = "", defaultOptions: Options = {}) {
accessToken;
defaultOptions;
Expand All @@ -63,6 +63,11 @@ export class HfInferenceEndpoint {
}
}

export interface HfInference extends TaskWithNoAccessToken {}
export interface InferenceClient extends TaskWithNoAccessToken {}

export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {}
export interface InferenceClientEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {}

/**
* For backward compatibility only.
*/
export class HfInference extends InferenceClient {}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backward compatibility line ^

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you have a preference vs. export const HfInference = InferenceClient; maybe @coyotte508?

Otherwise i'll merge

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you could have slapped @deprecated on it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oooh we have this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/**
 * @deprecated use YYY instead
 */
class XXX {
}

It will strike through in the editor everytime X is called

image

2 changes: 1 addition & 1 deletion packages/inference/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export { HfInference, HfInferenceEndpoint } from "./HfInference";
export { InferenceClient, InferenceClientEndpoint, HfInference } from "./InferenceClient";
export { InferenceOutputError } from "./lib/InferenceOutputError";
export * from "./types";
export * from "./tasks";
Expand Down
Loading
Loading