Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ for await (const output of hf.textGenerationStream({

### Text Generation (Chat Completion API Compatible)

Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.

[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)

Expand Down
1 change: 1 addition & 0 deletions packages/inference/src/config.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export const HF_HUB_URL = "https://huggingface.co";
export const HF_ROUTER_URL = "https://router.huggingface.co";
4 changes: 2 additions & 2 deletions packages/inference/src/lib/makeRequestOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HF_HUB_URL } from "../config";
import { HF_HUB_URL, HF_ROUTER_URL } from "../config";
import { FAL_AI_API_BASE_URL } from "../providers/fal-ai";
import { NEBIUS_API_BASE_URL } from "../providers/nebius";
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
Expand All @@ -11,7 +11,7 @@ import { isUrl } from "./isUrl";
import { version as packageVersion, name as packageName } from "../../package.json";
import { getProviderModelId } from "./getProviderModelId";

const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;

/**
* Lazy-loaded from huggingface.co/api/tasks when needed
Expand Down
13 changes: 7 additions & 6 deletions packages/inference/test/HfInference.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ describe.concurrent("HfInference", () => {
"HF Inference",
() => {
const hf = new HfInference(env.HF_TOKEN);

it("throws error if model does not exist", () => {
expect(
hf.fillMask({
model: "this-model-does-not-exist-123",
model: "this-model/does-not-exist-123",
inputs: "[MASK] world!",
})
).rejects.toThrowError("Not Found: Model not found");
).rejects.toThrowError("Model this-model/does-not-exist-123 does not exist");
});

it("fillMask", async () => {
Expand Down Expand Up @@ -648,7 +649,7 @@ describe.concurrent("HfInference", () => {
});

it("endpoint - makes request to specified endpoint", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/openai-community/gpt2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/openai-community/gpt2");
const { generated_text } = await ep.textGeneration({
inputs: "one plus two equals",
});
Expand Down Expand Up @@ -686,7 +687,7 @@ describe.concurrent("HfInference", () => {
expect(out).toContain("2");
});

it("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
it.skip("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
expect(
hf
.chatCompletionStream({
Expand All @@ -703,7 +704,7 @@ describe.concurrent("HfInference", () => {
});

it("chatCompletion - OpenAI Specs", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
const res = await ep.chatCompletion({
model: "tgi",
messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
Expand All @@ -717,7 +718,7 @@ describe.concurrent("HfInference", () => {
}
});
it("chatCompletionStream - OpenAI Specs", async () => {
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
const stream = ep.chatCompletionStream({
model: "tgi",
messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
Expand Down
Loading