Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy

browser:
Expand Down Expand Up @@ -88,6 +89,7 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy

e2e:
Expand Down Expand Up @@ -157,4 +159,5 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy
1 change: 1 addition & 0 deletions packages/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Currently, we support the following providers:
- [Fal.ai](https://fal.ai)
- [Fireworks AI](https://fireworks.ai)
- [Nebius](https://studio.nebius.ai)
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
- [Replicate](https://replicate.com)
- [Sambanova](https://sambanova.ai)
- [Together](https://together.xyz)
Expand Down
14 changes: 14 additions & 0 deletions packages/inference/src/lib/makeRequestOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { NEBIUS_API_BASE_URL } from "../providers/nebius";
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
import { TOGETHER_API_BASE_URL } from "../providers/together";
import { NOVITA_API_BASE_URL } from "../providers/novita";
import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
import type { InferenceProvider } from "../types";
import type { InferenceTask, Options, RequestArgs } from "../types";
Expand Down Expand Up @@ -213,6 +214,7 @@ function makeUrl(params: {
}
return baseUrl;
}

case "fireworks-ai": {
const baseUrl = shouldProxy
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
Expand All @@ -222,6 +224,18 @@ function makeUrl(params: {
}
return baseUrl;
}
case "novita": {
const baseUrl = shouldProxy
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
: NOVITA_API_BASE_URL;
if (params.taskHint === "text-generation") {
if (params.chatCompletion) {
return `${baseUrl}/chat/completions`;
}
return `${baseUrl}/completions`;
}
return baseUrl;
}
default: {
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
const url = params.forceTask
Expand Down
1 change: 1 addition & 0 deletions packages/inference/src/providers/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
replicate: {},
sambanova: {},
together: {},
novita: {},
};
18 changes: 18 additions & 0 deletions packages/inference/src/providers/novita.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
export const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";

/**
* See the registered mapping of HF model ID => Novita model ID here:
*
* https://huggingface.co/api/partners/novita/models
*
* This is a publicly available mapping.
*
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
*
* - If you work at Novita and want to update this mapping, please use the model mapping API we provide on huggingface.co
* - If you're a community member and want to add a new supported HF model to Novita, please open an issue on the present repo
* and we will tag Novita team members.
*
* Thanks!
*/
2 changes: 2 additions & 0 deletions packages/inference/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ export const INFERENCE_PROVIDERS = [
"replicate",
"sambanova",
"together",
"novita",
] as const;

export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];

export interface BaseArgs {
Expand Down
48 changes: 48 additions & 0 deletions packages/inference/test/HfInference.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1175,4 +1175,52 @@ describe.concurrent("HfInference", () => {
},
TIMEOUT
);

describe.concurrent(
"Novita",
() => {
const client = new HfInference(env.HF_NOVITA_KEY);

HARDCODED_MODEL_ID_MAPPING["novita"] = {
"meta-llama/llama-3.1-8b-instruct": "meta-llama/llama-3.1-8b-instruct",
"deepseek/deepseek-r1-distill-qwen-14b": "deepseek/deepseek-r1-distill-qwen-14b",
};

it("chatCompletion", async () => {
const res = await client.chatCompletion({
model: "meta-llama/llama-3.1-8b-instruct",
provider: "novita",
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
});
if (res.choices && res.choices.length > 0) {
const completion = res.choices[0].message?.content;
expect(completion).toContain("two");
}
});

it("chatCompletion stream", async () => {
const stream = client.chatCompletionStream({
model: "deepseek/deepseek-r1-distill-qwen-14b",
provider: "novita",
messages: [{ role: "user", content: "Say this is a test" }],
stream: true,
}) as AsyncGenerator<ChatCompletionStreamOutput>;

let fullResponse = "";
for await (const chunk of stream) {
if (chunk.choices && chunk.choices.length > 0) {
const content = chunk.choices[0].delta?.content;
if (content) {
fullResponse += content;
}
}
}

// Verify we got a meaningful response
expect(fullResponse).toBeTruthy();
expect(fullResponse.length).toBeGreaterThan(0);
});
},
TIMEOUT
);
});
61 changes: 61 additions & 0 deletions packages/inference/test/tapes.json

Large diffs are not rendered by default.