Skip to content

Commit d82fc7b

Browse files
committed
feat: Add Novita AI as an LLM API provider
1 parent f69c26c commit d82fc7b

File tree

6 files changed

+85
-2
lines changed

6 files changed

+85
-2
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ jobs:
4545
HF_REPLICATE_KEY: dummy
4646
HF_SAMBANOVA_KEY: dummy
4747
HF_TOGETHER_KEY: dummy
48+
HF_NOVITA_KEY: dummy
4849

4950
browser:
5051
runs-on: ubuntu-latest
@@ -85,6 +86,7 @@ jobs:
8586
HF_REPLICATE_KEY: dummy
8687
HF_SAMBANOVA_KEY: dummy
8788
HF_TOGETHER_KEY: dummy
89+
HF_NOVITA_KEY: dummy
8890

8991
e2e:
9092
runs-on: ubuntu-latest
@@ -152,3 +154,4 @@ jobs:
152154
HF_REPLICATE_KEY: dummy
153155
HF_SAMBANOVA_KEY: dummy
154156
HF_TOGETHER_KEY: dummy
157+
HF_NOVITA_KEY: dummy

packages/inference/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,12 @@ Your access token should be kept private. If you need to protect it in front-end
4646

4747
You can send inference requests to third-party providers with the inference client.
4848

49-
Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
49+
Currently, we support the following providers:
50+
- [Fal.ai](https://fal.ai)
51+
- [Replicate](https://replicate.com)
52+
- [Sambanova](https://sambanova.ai)
53+
- [Together](https://together.xyz)
54+
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
5055

5156
To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
5257
```ts

packages/inference/src/lib/makeRequestOptions.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { FAL_AI_API_BASE_URL } from "../providers/fal-ai";
33
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
44
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
55
import { TOGETHER_API_BASE_URL } from "../providers/together";
6+
import { NOVITA_API_BASE_URL } from "../providers/novita";
67
import type { InferenceProvider } from "../types";
78
import type { InferenceTask, Options, RequestArgs } from "../types";
89
import { isUrl } from "./isUrl";
@@ -208,6 +209,19 @@ function makeUrl(params: {
208209
}
209210
return baseUrl;
210211
}
212+
case "novita": {
213+
const baseUrl = shouldProxy
214+
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
215+
: NOVITA_API_BASE_URL;
216+
/// Novita API matches OpenAI-like APIs: model is defined in the request body
217+
if (params.taskHint === "text-generation") {
218+
if (params.chatCompletion) {
219+
return `${baseUrl}/chat/completions`;
220+
}
221+
return `${baseUrl}/completions`;
222+
}
223+
return baseUrl;
224+
}
211225
default: {
212226
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
213227
const url = params.forceTask
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
export const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";
2+
3+
/**
4+
* See the registered mapping of HF model ID => Novita model ID here:
5+
*
6+
* https://huggingface.co/api/partners/novita/models
7+
*
8+
* This is a publicly available mapping.
9+
*
10+
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
11+
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
12+
*
13+
* - If you work at Novita and want to update this mapping, please use the model mapping API we provide on huggingface.co
14+
* - If you're a community member and want to add a new supported HF model to Novita, please open an issue on the present repo
15+
* and we will tag Novita team members.
16+
*
17+
* Thanks!
18+
*/

packages/inference/src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ export interface Options {
4444

4545
export type InferenceTask = Exclude<PipelineType, "other">;
4646

47-
export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"] as const;
47+
export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference", "novita"] as const;
4848
export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
4949

5050
export interface BaseArgs {

packages/inference/test/HfInference.spec.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,4 +1077,47 @@ describe.concurrent("HfInference", () => {
10771077
);
10781078
});
10791079
});
1080+
1081+
describe.concurrent(
1082+
"Novita",
1083+
() => {
1084+
const client = new HfInference(env.HF_NOVITA_KEY);
1085+
1086+
it("chatCompletion", async () => {
1087+
const res = await client.chatCompletion({
1088+
model: "meta-llama/llama-3.1-8b-instruct",
1089+
provider: "novita",
1090+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1091+
});
1092+
if (res.choices && res.choices.length > 0) {
1093+
const completion = res.choices[0].message?.content;
1094+
expect(completion).toContain("two");
1095+
}
1096+
});
1097+
1098+
it("chatCompletion stream", async () => {
1099+
const stream = client.chatCompletionStream({
1100+
model: "meta-llama/llama-3.1-8b-instruct",
1101+
provider: "novita",
1102+
messages: [{ role: "user", content: "Say this is a test" }],
1103+
stream: true,
1104+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1105+
1106+
let fullResponse = "";
1107+
for await (const chunk of stream) {
1108+
if (chunk.choices && chunk.choices.length > 0) {
1109+
const content = chunk.choices[0].delta?.content;
1110+
if (content) {
1111+
fullResponse += content;
1112+
}
1113+
}
1114+
}
1115+
1116+
// Verify we got a meaningful response
1117+
expect(fullResponse).toBeTruthy();
1118+
expect(fullResponse.length).toBeGreaterThan(0);
1119+
});
1120+
},
1121+
TIMEOUT
1122+
);
10801123
});

0 commit comments

Comments
 (0)