Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy

browser:
Expand Down Expand Up @@ -88,6 +89,7 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy

e2e:
Expand Down Expand Up @@ -157,4 +159,5 @@ jobs:
HF_REPLICATE_KEY: dummy
HF_SAMBANOVA_KEY: dummy
HF_TOGETHER_KEY: dummy
HF_NOVITA_KEY: dummy
HF_FIREWORKS_KEY: dummy
1 change: 1 addition & 0 deletions packages/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Currently, we support the following providers:
- [Replicate](https://replicate.com)
- [Sambanova](https://sambanova.ai)
- [Together](https://together.xyz)
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)

To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
```ts
Expand Down
14 changes: 14 additions & 0 deletions packages/inference/src/lib/makeRequestOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { NEBIUS_API_BASE_URL } from "../providers/nebius";
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
import { TOGETHER_API_BASE_URL } from "../providers/together";
import { NOVITA_API_BASE_URL } from "../providers/novita";
import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
import type { InferenceProvider } from "../types";
import type { InferenceTask, Options, RequestArgs } from "../types";
Expand Down Expand Up @@ -213,6 +214,7 @@ function makeUrl(params: {
}
return baseUrl;
}

case "fireworks-ai": {
const baseUrl = shouldProxy
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
Expand All @@ -222,6 +224,18 @@ function makeUrl(params: {
}
return baseUrl;
}
case "novita": {
const baseUrl = shouldProxy
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
: NOVITA_API_BASE_URL;
if (params.taskHint === "text-generation") {
if (params.chatCompletion) {
return `${baseUrl}/chat/completions`;
}
return `${baseUrl}/completions`;
}
return baseUrl;
}
default: {
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
const url = params.forceTask
Expand Down
1 change: 1 addition & 0 deletions packages/inference/src/providers/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
replicate: {},
sambanova: {},
together: {},
novita: {},
};
18 changes: 18 additions & 0 deletions packages/inference/src/providers/novita.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
export const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";

/**
* See the registered mapping of HF model ID => Novita model ID here:
*
* https://huggingface.co/api/partners/novita/models
*
* This is a publicly available mapping.
*
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
*
* - If you work at Novita and want to update this mapping, please use the model mapping API we provide on huggingface.co
* - If you're a community member and want to add a new supported HF model to Novita, please open an issue on the present repo
* and we will tag Novita team members.
*
* Thanks!
*/
2 changes: 2 additions & 0 deletions packages/inference/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ export const INFERENCE_PROVIDERS = [
"replicate",
"sambanova",
"together",
"novita",
] as const;

export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];

export interface BaseArgs {
Expand Down
47 changes: 47 additions & 0 deletions packages/inference/test/HfInference.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1175,4 +1175,51 @@ describe.concurrent("HfInference", () => {
},
TIMEOUT
);

describe.concurrent(
"Novita",
() => {
const client = new HfInference(env.HF_NOVITA_KEY);

HARDCODED_MODEL_ID_MAPPING["novita"] = {
"meta-llama/llama-3.1-8b-instruct": "meta-llama/llama-3.1-8b-instruct",
};

it("chatCompletion", async () => {
const res = await client.chatCompletion({
model: "meta-llama/llama-3.1-8b-instruct",
provider: "novita",
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
});
if (res.choices && res.choices.length > 0) {
const completion = res.choices[0].message?.content;
expect(completion).toContain("two");
}
});

it("chatCompletion stream", async () => {
const stream = client.chatCompletionStream({
model: "meta-llama/llama-3.1-8b-instruct",
provider: "novita",
messages: [{ role: "user", content: "Say this is a test" }],
stream: true,
}) as AsyncGenerator<ChatCompletionStreamOutput>;

let fullResponse = "";
for await (const chunk of stream) {
if (chunk.choices && chunk.choices.length > 0) {
const content = chunk.choices[0].delta?.content;
if (content) {
fullResponse += content;
}
}
}

// Verify we got a meaningful response
expect(fullResponse).toBeTruthy();
expect(fullResponse.length).toBeGreaterThan(0);
});
},
TIMEOUT
);
});
40 changes: 40 additions & 0 deletions packages/inference/test/tapes.json
Original file line number Diff line number Diff line change
Expand Up @@ -7025,5 +7025,45 @@
"content-type": "image/jpeg"
}
}
},
"75477fa339f5e9db4fa105cf2d0f05d61e96726386478b8e3af8c8fda94480f5": {
"url": "https://api.novita.ai/v3/openai/chat/completions",
"init": {
"headers": {
"Content-Type": "application/json"
},
"method": "POST",
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"meta-llama/llama-3.1-8b-instruct\"}"
},
"response": {
"body": "{\"id\":\"chatcmpl-1266fc7405294c57ac180da9ee3f5e31\",\"object\":\"chat.completion\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"to two.\"},\"finish_reason\":\"stop\",\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"usage\":{\"prompt_tokens\":46,\"completion_tokens\":4,\"total_tokens\":50,\"prompt_tokens_details\":null,\"completion_tokens_details\":null},\"system_fingerprint\":\"\"}",
"status": 200,
"statusText": "OK",
"headers": {
"connection": "keep-alive",
"content-type": "application/json"
}
}
},
"55c9ad16e86e9926a473cbe89b627b343f3bb541a75adb3b94967cd1d197bd19": {
"url": "https://api.novita.ai/v3/openai/chat/completions",
"init": {
"headers": {
"Content-Type": "application/json"
},
"method": "POST",
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say this is a test\"}],\"stream\":true,\"model\":\"meta-llama/llama-3.1-8b-instruct\"}"
},
"response": {
"body": "data: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"T\"},\"finish_reason\":null,\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: {\"id\":\"chatcmpl-bbc4b318e99b4e33ab8dd76fb3fccf9c\",\"object\":\"chat.completion.chunk\",\"created\":1739366554,\"model\":\"meta-llama/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"his is only a test\"},\"finish_reason\":\"stop\",\"content_filter_results\":{\"hate\":{\"filtered\":false},\"self_harm\":{\"filtered\":false},\"sexual\":{\"filtered\":false},\"violence\":{\"filtered\":false},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"profanity\":{\"filtered\":false,\"detected\":false}}}],\"system_fingerprint\":\"\"}\n\ndata: [DONE]\n\n",
"status": 200,
"statusText": "OK",
"headers": {
"cache-control": "no-cache",
"connection": "keep-alive",
"content-type": "text/event-stream",
"transfer-encoding": "chunked"
}
}
}
}