diff --git a/README.md b/README.md
index fc8eb44a35..f2e4a4750c 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ await uploadFile({
 
 await inference.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
+  provider: "sambanova", // or together, fal-ai, replicate, cohere …
   messages: [
     {
       role: "user",
@@ -39,11 +40,11 @@ await inference.chatCompletion({
   ],
   max_tokens: 512,
   temperature: 0.5,
-  provider: "sambanova", // or together, fal-ai, replicate, cohere …
 });
 
 await inference.textToImage({
   model: "black-forest-labs/FLUX.1-dev",
+  provider: "replicate",
   inputs: "a picture of a green bird",
 });
 
@@ -54,7 +55,7 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and third-party Inference Providers to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -84,7 +85,7 @@ npm install @huggingface/agents
 Then import the libraries in your code:
 
 ```ts
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import { HfAgent } from "@huggingface/agents";
 import { createRepo, commit, deleteRepo, listFiles } from "@huggingface/hub";
 import type { RepoId } from "@huggingface/hub";
@@ -96,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.5.1/+esm';
+    import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.5.1/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.0.2/+esm";
 </script>
 ```
@@ -105,12 +106,12 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```ts
 // esm.sh
-import { HfInference } from "https://esm.sh/@huggingface/inference"
+import { InferenceClient } from "https://esm.sh/@huggingface/inference"
 import { HfAgent } from "https://esm.sh/@huggingface/agents";
 
 import { createRepo, commit, deleteRepo, listFiles } from "https://esm.sh/@huggingface/hub"
 // or npm:
-import { HfInference } from "npm:@huggingface/inference"
+import { InferenceClient } from "npm:@huggingface/inference"
 import { HfAgent } from "npm:@huggingface/agents";
 
 import { createRepo, commit, deleteRepo, listFiles } from "npm:@huggingface/hub"
@@ -123,11 +124,11 @@ Get your HF access token in your [account settings](https://huggingface.co/setti
 ### @huggingface/inference examples
 
 ```ts
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
 const HF_TOKEN = "hf_...";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 
 // Chat completion API
 const out = await inference.chatCompletion({
@@ -179,7 +180,7 @@ await inference.imageToText({
 
 // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
 const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
-const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
+const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
 
 // Chat Completion
 const llamaEndpoint = inference.endpoint(
diff --git a/e2e/deno/index.ts b/e2e/deno/index.ts
index bd70f4e418..02d11fe249 100644
--- a/e2e/deno/index.ts
+++ b/e2e/deno/index.ts
@@ -1,4 +1,4 @@
-import { HfInference } from "npm:@huggingface/inference@*";
+import { InferenceClient } from "npm:@huggingface/inference@*";
 import { whoAmI, listFiles } from "npm:@huggingface/hub@*";
 
 const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
@@ -10,7 +10,7 @@ for await (const file of listFiles({ repo: "gpt2" })) {
 
 const token = Deno.env.get("HF_TOKEN");
 if (token) {
-	const hf = new HfInference(token);
+	const hf = new InferenceClient(token);
 
 	const tokenInfo = await whoAmI({ credentials: { accessToken: token } });
 	console.log(tokenInfo);
diff --git a/e2e/svelte/src/routes/+page.svelte b/e2e/svelte/src/routes/+page.svelte
index 1ca0727d28..5c36447d5e 100644
--- a/e2e/svelte/src/routes/+page.svelte
+++ b/e2e/svelte/src/routes/+page.svelte
@@ -1,8 +1,8 @@
 <script>
 	import { whoAmI, listFiles } from "@huggingface/hub";
-	import { HfInference } from "@huggingface/inference";
+	import { InferenceClient } from "@huggingface/inference";
 
-	const hf = new HfInference();
+	const hf = new InferenceClient();
 
 	const test = async () => {
 		const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
diff --git a/e2e/ts/src/index.ts b/e2e/ts/src/index.ts
index 2ee185179b..e08467be8a 100644
--- a/e2e/ts/src/index.ts
+++ b/e2e/ts/src/index.ts
@@ -1,9 +1,9 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import { whoAmI } from "@huggingface/hub";
 
 const hfToken = process.env.token;
 
-const hf = new HfInference(hfToken);
+const hf = new InferenceClient(hfToken);
 
 (async () => {
 	const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
diff --git a/packages/agents/package.json b/packages/agents/package.json
index d9c57eace6..a461d46393 100644
--- a/packages/agents/package.json
+++ b/packages/agents/package.json
@@ -56,6 +56,6 @@
 		"@types/node": "^18.13.0"
 	},
 	"dependencies": {
-		"@huggingface/inference": "^2.6.1"
+		"@huggingface/inference": "workspace:^"
 	}
 }
diff --git a/packages/agents/pnpm-lock.yaml b/packages/agents/pnpm-lock.yaml
index 455c7460ab..d8ea1de594 100644
--- a/packages/agents/pnpm-lock.yaml
+++ b/packages/agents/pnpm-lock.yaml
@@ -6,8 +6,8 @@ settings:
 
 dependencies:
   '@huggingface/inference':
-    specifier: ^2.6.1
-    version: 2.8.1
+    specifier: workspace:^
+    version: link:../inference
 
 devDependencies:
   '@types/node':
@@ -16,17 +16,6 @@ devDependencies:
 
 packages:
 
-  /@huggingface/inference@2.8.1:
-    resolution: {integrity: sha512-EfsNtY9OR6JCNaUa5bZu2mrs48iqeTz0Gutwf+fU0Kypx33xFQB4DKMhp8u4Ee6qVbLbNWvTHuWwlppLQl4p4Q==}
-    engines: {node: '>=18'}
-    dependencies:
-      '@huggingface/tasks': 0.12.30
-    dev: false
-
-  /@huggingface/tasks@0.12.30:
-    resolution: {integrity: sha512-A1ITdxbEzx9L8wKR8pF7swyrTLxWNDFIGDLUWInxvks2ruQ8PLRBZe8r0EcjC3CDdtlj9jV1V4cgV35K/iy3GQ==}
-    dev: false
-
   /@types/node@18.13.0:
     resolution: {integrity: sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg==}
     dev: true
diff --git a/packages/agents/src/lib/evalBuilder.ts b/packages/agents/src/lib/evalBuilder.ts
index 4853532612..ed5de329e0 100644
--- a/packages/agents/src/lib/evalBuilder.ts
+++ b/packages/agents/src/lib/evalBuilder.ts
@@ -1,4 +1,4 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import type { Data, Tool } from "../types";
 
 // this function passes the tools & files to the context before calling eval
@@ -17,7 +17,7 @@ export async function evalBuilder(
 
 		// add tools to context
 		for (const tool of tools) {
-			const toolCall = (input: Promise<Data>) => tool.call?.(input, new HfInference(accessToken ?? ""));
+			const toolCall = (input: Promise<Data>) => tool.call?.(input, new InferenceClient(accessToken ?? ""));
 			// @ts-expect-error adding to the scope
 			globalThis[tool.name] = toolCall;
 		}
diff --git a/packages/agents/src/llms/LLMHF.ts b/packages/agents/src/llms/LLMHF.ts
index 855a1b5387..1e5c345311 100644
--- a/packages/agents/src/llms/LLMHF.ts
+++ b/packages/agents/src/llms/LLMHF.ts
@@ -1,8 +1,8 @@
 import type { LLM } from "../types";
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
 export function LLMFromHub(accessToken?: string, model?: string): LLM {
-	const inference = new HfInference(accessToken);
+	const inference = new InferenceClient(accessToken);
 
 	return async (prompt: string): Promise<string> => {
 		const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
@@ -20,7 +20,7 @@ export function LLMFromHub(accessToken?: string, model?: string): LLM {
 }
 
 export function LLMFromEndpoint(accessToken: string, endpoint: string): LLM {
-	const inference = new HfInference(accessToken).endpoint(endpoint);
+	const inference = new InferenceClient(accessToken).endpoint(endpoint);
 	return async (prompt: string): Promise<string> => {
 		const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
 
diff --git a/packages/agents/src/tools/imageToText.ts b/packages/agents/src/tools/imageToText.ts
index 0ba0852e2b..fc44fe4d00 100644
--- a/packages/agents/src/tools/imageToText.ts
+++ b/packages/agents/src/tools/imageToText.ts
@@ -15,9 +15,12 @@ export const imageToTextTool: Tool = {
 		if (typeof data === "string") throw "Input must be a blob.";
 
 		return (
-			await inference.imageToText({
-				data,
-			})
-		).generated_text;
+			// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+			(
+				await inference.imageToText({
+					data,
+				})
+			).generated_text!
+		);
 	},
 };
diff --git a/packages/agents/src/types.d.ts b/packages/agents/src/types.d.ts
index 122d33e309..f0955e48c9 100644
--- a/packages/agents/src/types.d.ts
+++ b/packages/agents/src/types.d.ts
@@ -1,4 +1,4 @@
-import type { HfInference } from "@huggingface/inference";
+import type { InferenceClient } from "@huggingface/inference";
 
 export type Data = string | Blob | ArrayBuffer;
 
@@ -6,7 +6,7 @@ export interface Tool {
 	name: string;
 	description: string;
 	examples: Array<Example>;
-	call?: (input: Promise<Data>, inference: HfInference) => Promise<Data>;
+	call?: (input: Promise<Data>, inference: InferenceClient) => Promise<Data>;
 }
 
 export interface Example {
diff --git a/packages/agents/test/HfAgent.spec.ts b/packages/agents/test/HfAgent.spec.ts
index 5b99e0a1f0..9c46053971 100644
--- a/packages/agents/test/HfAgent.spec.ts
+++ b/packages/agents/test/HfAgent.spec.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from "vitest";
 import { HfAgent, defaultTools, LLMFromHub, LLMFromEndpoint } from "../src";
 import type { Data } from "../src/types";
-import type { HfInference } from "@huggingface/inference";
+import type { InferenceClient } from "@huggingface/inference";
 
 const env = import.meta.env;
 if (!env.HF_TOKEN) {
@@ -33,7 +33,7 @@ describe("HfAgent", () => {
 				},
 			],
 			// eslint-disable-next-line @typescript-eslint/no-unused-vars
-			call: async (input: Promise<Data>, inference: HfInference): Promise<Data> => {
+			call: async (input: Promise<Data>, inference: InferenceClient): Promise<Data> => {
 				const data = await input;
 				if (typeof data !== "string") {
 					throw new Error("Input must be a string");
diff --git a/packages/gguf/src/cli.ts b/packages/gguf/src/cli.ts
index 9662d6db88..eac386d849 100644
--- a/packages/gguf/src/cli.ts
+++ b/packages/gguf/src/cli.ts
@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 
-import { GGMLQuantizationType, gguf, ggufAllShards, GGUFParseOutput } from ".";
+import type { GGUFParseOutput } from ".";
+import { GGMLQuantizationType, ggufAllShards } from ".";
 import { GGML_QUANT_SIZES } from "./quant-descriptions";
 
 interface PrintColumnHeader {
diff --git a/packages/inference/README.md b/packages/inference/README.md
index 3907b43340..c4a2ea4b1d 100644
--- a/packages/inference/README.md
+++ b/packages/inference/README.md
@@ -1,7 +1,7 @@
 # 🤗 Hugging Face Inference
 
-A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
-It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.
+A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and all supported Inference Providers.
+It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with all supported third-party Inference Providers.
 
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
 
@@ -25,24 +25,24 @@ yarn add @huggingface/inference
 
 ```ts
 // esm.sh
-import { HfInference } from "https://esm.sh/@huggingface/inference"
+import { InferenceClient } from "https://esm.sh/@huggingface/inference"
 // or npm:
-import { HfInference } from "npm:@huggingface/inference"
+import { InferenceClient } from "npm:@huggingface/inference"
 ```
 
 ### Initialize
 
 ```typescript
-import { HfInference } from '@huggingface/inference'
+import { InferenceClient } from '@huggingface/inference'
 
-const hf = new HfInference('your access token')
+const hf = new InferenceClient('your access token')
 ```
 
 ❗**Important note:** Using an access token is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**.
 
 Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
 
-### Third-party inference providers
+### All supported inference providers
 
 You can send inference requests to third-party providers with the inference client.
 
@@ -63,7 +63,7 @@ To send requests to a third-party provider, you have to pass the `provider` para
 ```ts
 const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example)
 
-const client = new HfInference(accessToken);
+const client = new InferenceClient(accessToken);
 await client.textToImage({
   provider: "replicate",
   model:"black-forest-labs/Flux.1-dev",
@@ -93,7 +93,7 @@ This is not an issue for LLMs as everyone converged on the OpenAI API anyways, b
 
 ### Tree-shaking
 
-You can import the functions you need directly from the module instead of using the `HfInference` class.
+You can import the functions you need directly from the module instead of using the `InferenceClient` class.
 
 ```ts
 import { textGeneration } from "@huggingface/inference";
@@ -165,7 +165,7 @@ for await (const chunk of hf.chatCompletionStream({
 It's also possible to call Mistral or OpenAI endpoints directly:
 
 ```typescript
-const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
+const openai = new InferenceClient(OPENAI_TOKEN).endpoint("https://api.openai.com");
 
 let out = "";
 for await (const chunk of openai.chatCompletionStream({
@@ -602,7 +602,7 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
 ```typescript
 // Chat Completion Example
 const MISTRAL_KEY = process.env.MISTRAL_KEY;
-const hf = new HfInference(MISTRAL_KEY);
+const hf = new InferenceClient(MISTRAL_KEY);
 const ep = hf.endpoint("https://api.mistral.ai");
 const stream = ep.chatCompletionStream({
   model: "mistral-tiny",
diff --git a/packages/inference/package.json b/packages/inference/package.json
index 31fafb2031..466a8093ef 100644
--- a/packages/inference/package.json
+++ b/packages/inference/package.json
@@ -3,8 +3,8 @@
 	"version": "3.5.1",
 	"packageManager": "pnpm@8.10.5",
 	"license": "MIT",
-	"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
-	"description": "Typescript wrapper for the Hugging Face Inference Endpoints & Inference API",
+	"author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
+	"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
 	"repository": {
 		"type": "git",
 		"url": "https://github.com/huggingface/huggingface.js.git"
diff --git a/packages/inference/scripts/generate-dts.ts b/packages/inference/scripts/generate-dts.ts
index 35427d5236..d483fc8946 100644
--- a/packages/inference/scripts/generate-dts.ts
+++ b/packages/inference/scripts/generate-dts.ts
@@ -61,12 +61,12 @@ for (const dir of dirs) {
 
 appendFileSync(
 	"./dist/index.d.ts",
-	`export class HfInference {
+	`export class InferenceClient {
 \tconstructor(accessToken?: string, defaultOptions?: Options);
 \t/**
-\t * Returns copy of HfInference tied to a specified endpoint.
+\t * Returns copy of InferenceClient tied to a specified endpoint.
 \t */
-\tendpoint(endpointUrl: string): HfInferenceEndpoint;
+\tendpoint(endpointUrl: string): InferenceClientEndpoint;
 ` +
 		fns
 			.map(
@@ -84,7 +84,7 @@ appendFileSync(
 
 appendFileSync(
 	"./dist/index.d.ts",
-	`export class HfInferenceEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
+	`export class InferenceClientEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
 		fns
 			.map(
 				(fn) =>
diff --git a/packages/inference/src/HfInference.ts b/packages/inference/src/InferenceClient.ts
similarity index 76%
rename from packages/inference/src/HfInference.ts
rename to packages/inference/src/InferenceClient.ts
index 6cc268cf29..0c83295450 100644
--- a/packages/inference/src/HfInference.ts
+++ b/packages/inference/src/InferenceClient.ts
@@ -21,7 +21,7 @@ type TaskWithNoAccessTokenNoEndpointUrl = {
 	) => ReturnType<Task[key]>;
 };
 
-export class HfInference {
+export class InferenceClient {
 	private readonly accessToken: string;
 	private readonly defaultOptions: Options;
 
@@ -40,14 +40,14 @@ export class HfInference {
 	}
 
 	/**
-	 * Returns copy of HfInference tied to a specified endpoint.
+	 * Returns copy of InferenceClient tied to a specified endpoint.
 	 */
-	public endpoint(endpointUrl: string): HfInferenceEndpoint {
-		return new HfInferenceEndpoint(endpointUrl, this.accessToken, this.defaultOptions);
+	public endpoint(endpointUrl: string): InferenceClientEndpoint {
+		return new InferenceClientEndpoint(endpointUrl, this.accessToken, this.defaultOptions);
 	}
 }
 
-export class HfInferenceEndpoint {
+export class InferenceClientEndpoint {
 	constructor(endpointUrl: string, accessToken = "", defaultOptions: Options = {}) {
 		accessToken;
 		defaultOptions;
@@ -63,6 +63,11 @@ export class HfInferenceEndpoint {
 	}
 }
 
-export interface HfInference extends TaskWithNoAccessToken {}
+export interface InferenceClient extends TaskWithNoAccessToken {}
 
-export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {}
+export interface InferenceClientEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {}
+
+/**
+ * For backward compatibility only.
+ */
+export class HfInference extends InferenceClient {}
diff --git a/packages/inference/src/index.ts b/packages/inference/src/index.ts
index 3cd1d8fcab..b21526a5ea 100644
--- a/packages/inference/src/index.ts
+++ b/packages/inference/src/index.ts
@@ -1,4 +1,4 @@
-export { HfInference, HfInferenceEndpoint } from "./HfInference";
+export { InferenceClient, InferenceClientEndpoint, HfInference } from "./InferenceClient";
 export { InferenceOutputError } from "./lib/InferenceOutputError";
 export * from "./types";
 export * from "./tasks";
diff --git a/packages/inference/src/snippets/js.ts b/packages/inference/src/snippets/js.ts
index c8d38b00be..f9e8edb099 100644
--- a/packages/inference/src/snippets/js.ts
+++ b/packages/inference/src/snippets/js.ts
@@ -34,9 +34,9 @@ export const snippetBasic = (
 					{
 						client: "huggingface.js",
 						content: `\
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 const output = await client.${HFJS_METHODS[model.pipeline_tag]}({
 	model: "${model.id}",
@@ -109,9 +109,9 @@ export const snippetTextGeneration = (
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from "@huggingface/inference";
+					content: `import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 let out = "";
 
@@ -161,9 +161,9 @@ for await (const chunk of stream) {
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from "@huggingface/inference";
+					content: `import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 const chatCompletion = await client.chatCompletion({
 	model: "${model.id}",
@@ -238,9 +238,9 @@ export const snippetTextToImage = (
 		{
 			client: "huggingface.js",
 			content: `\
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 const image = await client.textToImage({
 	model: "${model.id}",
@@ -289,9 +289,9 @@ export const snippetTextToVideo = (
 				{
 					client: "huggingface.js",
 					content: `\
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 const video = await client.textToVideo({
 	model: "${model.id}",
@@ -369,9 +369,9 @@ export const snippetAutomaticSpeechRecognition = (
 		{
 			client: "huggingface.js",
 			content: `\
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+const client = new InferenceClient("${accessToken || `{API_TOKEN}`}");
 
 const data = fs.readFileSync(${getModelInputSnippet(model)});
 
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/InferenceClient.spec.ts
similarity index 97%
rename from packages/inference/test/HfInference.spec.ts
rename to packages/inference/test/InferenceClient.spec.ts
index c6c5350408..0d16f9d1d8 100644
--- a/packages/inference/test/HfInference.spec.ts
+++ b/packages/inference/test/InferenceClient.spec.ts
@@ -3,7 +3,14 @@ import { assert, describe, expect, it } from "vitest";
 import type { ChatCompletionStreamOutput } from "@huggingface/tasks";
 
 import type { TextToImageArgs } from "../src";
-import { chatCompletion, chatCompletionStream, HfInference, textGeneration, textToImage } from "../src";
+import {
+	chatCompletion,
+	chatCompletionStream,
+	InferenceClient,
+	textGeneration,
+	textToImage,
+	HfInference,
+} from "../src";
 import { textToVideo } from "../src/tasks/cv/textToVideo";
 import { readTestFile } from "./test-files";
 import "./vcr";
@@ -17,13 +24,20 @@ if (!env.HF_TOKEN) {
 	console.warn("Set HF_TOKEN in the env to run the tests for better rate limits");
 }
 
-describe.concurrent("HfInference", () => {
+describe.concurrent("InferenceClient", () => {
 	// Individual tests can be ran without providing an api key, however running all tests without an api key will result in rate limiting error.
 
+	describe("backward compatibility", () => {
+		it("works with old HfInference name", async () => {
+			const hf = new HfInference(env.HF_TOKEN);
+			expect("fillMask" in hf).toBe(true);
+		});
+	});
+
 	describe.concurrent(
 		"HF Inference",
 		() => {
-			const hf = new HfInference(env.HF_TOKEN);
+			const hf = new InferenceClient(env.HF_TOKEN);
 
 			it("throws error if model does not exist", () => {
 				expect(
@@ -738,7 +752,7 @@ describe.concurrent("HfInference", () => {
 			});
 			it("custom mistral - OpenAI Specs", async () => {
 				const MISTRAL_KEY = env.MISTRAL_KEY;
-				const hf = new HfInference(MISTRAL_KEY);
+				const hf = new InferenceClient(MISTRAL_KEY);
 				const ep = hf.endpoint("https://api.mistral.ai");
 				const stream = ep.chatCompletionStream({
 					model: "mistral-tiny",
@@ -754,7 +768,7 @@ describe.concurrent("HfInference", () => {
 			});
 			it("custom openai - OpenAI Specs", async () => {
 				const OPENAI_KEY = env.OPENAI_KEY;
-				const hf = new HfInference(OPENAI_KEY);
+				const hf = new InferenceClient(OPENAI_KEY);
 				const stream = hf.chatCompletionStream({
 					provider: "openai",
 					model: "openai/gpt-3.5-turbo",
@@ -770,7 +784,7 @@ describe.concurrent("HfInference", () => {
 			});
 			it("OpenAI client side routing - model should have provider as prefix", async () => {
 				await expect(
-					new HfInference("dummy_token").chatCompletion({
+					new InferenceClient("dummy_token").chatCompletion({
 						model: "gpt-3.5-turbo", // must be "openai/gpt-3.5-turbo"
 						provider: "openai",
 						messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
@@ -787,7 +801,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Fal AI",
 		() => {
-			const client = new HfInference(env.HF_FAL_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_FAL_KEY ?? "dummy");
 
 			it(`textToImage - black-forest-labs/FLUX.1-schnell`, async () => {
 				const res = await client.textToImage({
@@ -873,7 +887,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Replicate",
 		() => {
-			const client = new HfInference(env.HF_REPLICATE_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_REPLICATE_KEY ?? "dummy");
 
 			it("textToImage canonical - black-forest-labs/FLUX.1-schnell", async () => {
 				const res = await client.textToImage({
@@ -989,7 +1003,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"SambaNova",
 		() => {
-			const client = new HfInference(env.HF_SAMBANOVA_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_SAMBANOVA_KEY ?? "dummy");
 
 			it("chatCompletion", async () => {
 				const res = await client.chatCompletion({
@@ -1023,7 +1037,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Together",
 		() => {
-			const client = new HfInference(env.HF_TOGETHER_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_TOGETHER_KEY ?? "dummy");
 
 			it("chatCompletion", async () => {
 				const res = await client.chatCompletion({
@@ -1078,7 +1092,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Nebius",
 		() => {
-			const client = new HfInference(env.HF_NEBIUS_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_NEBIUS_KEY ?? "dummy");
 
 			HARDCODED_MODEL_ID_MAPPING.nebius = {
 				"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -1143,7 +1157,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Fireworks",
 		() => {
-			const client = new HfInference(env.HF_FIREWORKS_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_FIREWORKS_KEY ?? "dummy");
 
 			HARDCODED_MODEL_ID_MAPPING["fireworks-ai"] = {
 				"deepseek-ai/DeepSeek-R1": "accounts/fireworks/models/deepseek-r1",
@@ -1269,7 +1283,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Novita",
 		() => {
-			const client = new HfInference(env.HF_NOVITA_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_NOVITA_KEY ?? "dummy");
 
 			HARDCODED_MODEL_ID_MAPPING["novita"] = {
 				"meta-llama/llama-3.1-8b-instruct": "meta-llama/llama-3.1-8b-instruct",
@@ -1362,7 +1376,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Cohere",
 		() => {
-			const client = new HfInference(env.HF_COHERE_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_COHERE_KEY ?? "dummy");
 
 			HARDCODED_MODEL_ID_MAPPING["cohere"] = {
 				"CohereForAI/c4ai-command-r7b-12-2024": "command-r7b-12-2024",
@@ -1409,7 +1423,7 @@ describe.concurrent("HfInference", () => {
 	describe.concurrent(
 		"Cerebras",
 		() => {
-			const client = new HfInference(env.HF_CEREBRAS_KEY ?? "dummy");
+			const client = new InferenceClient(env.HF_CEREBRAS_KEY ?? "dummy");
 
 			HARDCODED_MODEL_ID_MAPPING["cerebras"] = {
 				"meta-llama/llama-3.1-8b-instruct": "llama3.1-8b",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.hf-inference.js
index 15361c3e45..bb9e3426ad 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const chatCompletion = await client.chatCompletion({
 	model: "meta-llama/Llama-3.1-8B-Instruct",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.together.js
index 9d64919ed9..d813490d58 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.together.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.together.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const chatCompletion = await client.chatCompletion({
 	model: "meta-llama/Llama-3.1-8B-Instruct",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.hf-inference.js
index 4bdd0c143e..489af2f920 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 let out = "";
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.together.js
index 5822b2ce57..27aad589fb 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.together.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.together.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 let out = "";
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.fireworks-ai.js
index 88e8d44506..bd9d3b53ef 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.fireworks-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.fireworks-ai.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const chatCompletion = await client.chatCompletion({
 	model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.hf-inference.js
index 2a69d4d82c..04c76c0349 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const chatCompletion = await client.chatCompletion({
 	model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.fireworks-ai.js
index 2407499efe..e85330ba44 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.fireworks-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.fireworks-ai.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 let out = "";
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.hf-inference.js
index 45886e7709..063d9b9ade 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 let out = "";
 
diff --git a/packages/tasks-gen/snippets-fixtures/text-classification/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/text-classification/0.huggingface.js.hf-inference.js
index 5d7bfb7307..be0301a797 100644
--- a/packages/tasks-gen/snippets-fixtures/text-classification/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/text-classification/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const output = await client.textClassification({
 	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.fal-ai.js
index 1eff0bedbe..979f35bb83 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.fal-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.fal-ai.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const image = await client.textToImage({
 	model: "black-forest-labs/FLUX.1-schnell",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.hf-inference.js b/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.hf-inference.js
index f506de747a..89f1d61edd 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.hf-inference.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-image/0.huggingface.js.hf-inference.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const image = await client.textToImage({
 	model: "black-forest-labs/FLUX.1-schnell",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js
index 1176dc2716..6dfceaefca 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const video = await client.textToVideo({
 	model: "tencent/HunyuanVideo",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js
index fa0c617b3b..92f7572283 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js
@@ -1,6 +1,6 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("api_token");
+const client = new InferenceClient("api_token");
 
 const video = await client.textToVideo({
 	model: "tencent/HunyuanVideo",
diff --git a/packages/tasks/src/inference-providers.ts b/packages/tasks/src/inference-providers.ts
index 41f5b8d729..0a5ae18099 100644
--- a/packages/tasks/src/inference-providers.ts
+++ b/packages/tasks/src/inference-providers.ts
@@ -19,7 +19,7 @@ export const HF_HUB_INFERENCE_PROXY_TEMPLATE = `https://router.huggingface.co/{{
 /**
  * URL to set as baseUrl in the OpenAI SDK.
  *
- * TODO(Expose this from HfInference in the future?)
+ * TODO(Expose this from InferenceClient in the future?)
  */
 export function openAIbaseUrl(provider: SnippetInferenceProvider): string {
 	const url = HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", provider);
diff --git a/packages/tasks/src/tasks/audio-classification/about.md b/packages/tasks/src/tasks/audio-classification/about.md
index c5c7c8d298..dfeab08f05 100644
--- a/packages/tasks/src/tasks/audio-classification/about.md
+++ b/packages/tasks/src/tasks/audio-classification/about.md
@@ -51,9 +51,9 @@ data = query("sample1.flac")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with audio classification models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.audioClassification({
 	data: await (await fetch("sample.flac")).blob(),
 	model: "facebook/mms-lid-126",
diff --git a/packages/tasks/src/tasks/audio-to-audio/about.md b/packages/tasks/src/tasks/audio-to-audio/about.md
index a05c01da2e..eeda8c16af 100644
--- a/packages/tasks/src/tasks/audio-to-audio/about.md
+++ b/packages/tasks/src/tasks/audio-to-audio/about.md
@@ -33,9 +33,9 @@ data = query("sample1.flac")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with audio-to-audio models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.audioToAudio({
 	data: await (await fetch("sample.flac")).blob(),
 	model: "speechbrain/sepformer-wham",
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/about.md b/packages/tasks/src/tasks/automatic-speech-recognition/about.md
index 9cdc187da3..e1cab59719 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/about.md
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/about.md
@@ -39,9 +39,9 @@ data = query("sample1.flac")
 You can also use [huggingface.js](https://github.com/huggingface/huggingface.js), the JavaScript client, to transcribe audio with the Serverless Inference API.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.automaticSpeechRecognition({
 	data: await (await fetch("sample.flac")).blob(),
 	model: "openai/whisper-large-v3",
diff --git a/packages/tasks/src/tasks/image-classification/about.md b/packages/tasks/src/tasks/image-classification/about.md
index 04169331f2..6d2b445fa6 100644
--- a/packages/tasks/src/tasks/image-classification/about.md
+++ b/packages/tasks/src/tasks/image-classification/about.md
@@ -27,9 +27,9 @@ clf("path_to_a_cat_image")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to classify images using models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.imageClassification({
 	data: await (await fetch("https://picsum.photos/300/300")).blob(),
 	model: "microsoft/resnet-50",
diff --git a/packages/tasks/src/tasks/image-segmentation/about.md b/packages/tasks/src/tasks/image-segmentation/about.md
index ce9be3aecf..18af58ad86 100644
--- a/packages/tasks/src/tasks/image-segmentation/about.md
+++ b/packages/tasks/src/tasks/image-segmentation/about.md
@@ -43,9 +43,9 @@ model("cat.png")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image segmentation models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.imageSegmentation({
 	data: await (await fetch("https://picsum.photos/300/300")).blob(),
 	model: "facebook/mask2former-swin-base-coco-panoptic",
diff --git a/packages/tasks/src/tasks/image-to-image/about.md b/packages/tasks/src/tasks/image-to-image/about.md
index 3750b34e5f..d3c2acd297 100644
--- a/packages/tasks/src/tasks/image-to-image/about.md
+++ b/packages/tasks/src/tasks/image-to-image/about.md
@@ -45,9 +45,9 @@ make_image_grid([init_image, image], rows=1, cols=2)
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.imageToImage({
 	data: await (await fetch("image")).blob(),
 	model: "timbrooks/instruct-pix2pix",
diff --git a/packages/tasks/src/tasks/image-to-text/about.md b/packages/tasks/src/tasks/image-to-text/about.md
index e479783b3f..403dbdc41d 100644
--- a/packages/tasks/src/tasks/image-to-text/about.md
+++ b/packages/tasks/src/tasks/image-to-text/about.md
@@ -42,9 +42,9 @@ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-text models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.imageToText({
 	data: await (await fetch("https://picsum.photos/300/300")).blob(),
 	model: "Salesforce/blip-image-captioning-base",
diff --git a/packages/tasks/src/tasks/summarization/about.md b/packages/tasks/src/tasks/summarization/about.md
index 79c585217d..44c8d2a4a0 100644
--- a/packages/tasks/src/tasks/summarization/about.md
+++ b/packages/tasks/src/tasks/summarization/about.md
@@ -23,9 +23,9 @@ classifier("Paris is the capital and most populous city of France, with an estim
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer summarization models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 const inputs =
 	"Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017.";
 
diff --git a/packages/tasks/src/tasks/text-classification/about.md b/packages/tasks/src/tasks/text-classification/about.md
index faba693d03..dc0bfdf470 100644
--- a/packages/tasks/src/tasks/text-classification/about.md
+++ b/packages/tasks/src/tasks/text-classification/about.md
@@ -115,9 +115,9 @@ classifier("Which city is the capital of France?, Where is the capital of France
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer text classification models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.conversational({
 	model: "distilbert-base-uncased-finetuned-sst-2-english",
 	inputs: "I love this movie!",
diff --git a/packages/tasks/src/tasks/text-generation/about.md b/packages/tasks/src/tasks/text-generation/about.md
index f0e02153c5..734d83d4b7 100644
--- a/packages/tasks/src/tasks/text-generation/about.md
+++ b/packages/tasks/src/tasks/text-generation/about.md
@@ -74,9 +74,9 @@ text2text_generator("translate from English to French: I'm very happy")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer text classification models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.conversational({
 	model: "distilbert-base-uncased-finetuned-sst-2-english",
 	inputs: "I love this movie!",
diff --git a/packages/tasks/src/tasks/text-to-image/about.md b/packages/tasks/src/tasks/text-to-image/about.md
index e64a86615e..9cb11798ec 100644
--- a/packages/tasks/src/tasks/text-to-image/about.md
+++ b/packages/tasks/src/tasks/text-to-image/about.md
@@ -57,9 +57,9 @@ image = pipe(prompt).images[0]
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer text-to-image models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.textToImage({
 	model: "stabilityai/stable-diffusion-2",
 	inputs: "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]",
diff --git a/packages/tasks/src/tasks/text-to-speech/about.md b/packages/tasks/src/tasks/text-to-speech/about.md
index 56d795680d..c241d63df6 100644
--- a/packages/tasks/src/tasks/text-to-speech/about.md
+++ b/packages/tasks/src/tasks/text-to-speech/about.md
@@ -45,9 +45,9 @@ synthesizer("Look I am generating speech in three lines of code!")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer summarization models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.textToSpeech({
 	model: "facebook/mms-tts",
 	inputs: "text to generate speech from",
diff --git a/packages/tasks/src/tasks/translation/about.md b/packages/tasks/src/tasks/translation/about.md
index 23fc48576f..f4806687a5 100644
--- a/packages/tasks/src/tasks/translation/about.md
+++ b/packages/tasks/src/tasks/translation/about.md
@@ -35,9 +35,9 @@ translator("How are you?")
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer translation models on Hugging Face Hub.
 
 ```javascript
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 await inference.translation({
 	model: "t5-base",
 	inputs: "My name is Wolfgang and I live in Berlin",