Skip to content

Commit 1d8305a

Browse files
authored
Merge branch 'main' into main
2 parents a5b552d + 4f4e176 commit 1d8305a

File tree

7 files changed

+59
-27
lines changed

7 files changed

+59
-27
lines changed

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ await uploadFile({
2727
}
2828
});
2929

30-
// Use HF Inference API, or external Inference Providers!
30+
// Use all supported Inference Providers!
3131

3232
await inference.chatCompletion({
3333
model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -55,7 +55,7 @@ await inference.textToImage({
5555

5656
This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
5757

58-
- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
58+
- [@huggingface/inference](packages/inference/README.md): Use all supported (serverless) Inference Providers or switch to Inference Endpoints (dedicated) to make calls to 100,000+ Machine Learning models
5959
- [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
6060
- [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
6161
- [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -128,18 +128,18 @@ import { InferenceClient } from "@huggingface/inference";
128128

129129
const HF_TOKEN = "hf_...";
130130

131-
const inference = new InferenceClient(HF_TOKEN);
131+
const client = new InferenceClient(HF_TOKEN);
132132

133133
// Chat completion API
134-
const out = await inference.chatCompletion({
134+
const out = await client.chatCompletion({
135135
model: "meta-llama/Llama-3.1-8B-Instruct",
136136
messages: [{ role: "user", content: "Hello, nice to meet you!" }],
137137
max_tokens: 512
138138
});
139139
console.log(out.choices[0].message);
140140

141141
// Streaming chat completion API
142-
for await (const chunk of inference.chatCompletionStream({
142+
for await (const chunk of client.chatCompletionStream({
143143
model: "meta-llama/Llama-3.1-8B-Instruct",
144144
messages: [{ role: "user", content: "Hello, nice to meet you!" }],
145145
max_tokens: 512
@@ -148,14 +148,14 @@ for await (const chunk of inference.chatCompletionStream({
148148
}
149149

150150
/// Using a third-party provider:
151-
await inference.chatCompletion({
151+
await client.chatCompletion({
152152
model: "meta-llama/Llama-3.1-8B-Instruct",
153153
messages: [{ role: "user", content: "Hello, nice to meet you!" }],
154154
max_tokens: 512,
155155
provider: "sambanova", // or together, fal-ai, replicate, cohere …
156156
})
157157

158-
await inference.textToImage({
158+
await client.textToImage({
159159
model: "black-forest-labs/FLUX.1-dev",
160160
inputs: "a picture of a green bird",
161161
provider: "fal-ai",
@@ -164,7 +164,7 @@ await inference.textToImage({
164164

165165

166166
// You can also omit "model" to use the recommended model for the task
167-
await inference.translation({
167+
await client.translation({
168168
inputs: "My name is Wolfgang and I live in Amsterdam",
169169
parameters: {
170170
src_lang: "en",
@@ -173,17 +173,17 @@ await inference.translation({
173173
});
174174

175175
// pass multimodal files or URLs as inputs
176-
await inference.imageToText({
176+
await client.imageToText({
177177
model: 'nlpconnect/vit-gpt2-image-captioning',
178178
data: await (await fetch('https://picsum.photos/300/300')).blob(),
179179
})
180180

181181
// Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
182-
const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
183-
const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
182+
const gpt2Client = client.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
183+
const { generated_text } = await gpt2Client.textGeneration({ inputs: 'The answer to the universe is' });
184184

185185
// Chat Completion
186-
const llamaEndpoint = inference.endpoint(
186+
const llamaEndpoint = client.endpoint(
187187
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
188188
);
189189
const out = await llamaEndpoint.chatCompletion({

packages/inference/test/InferenceClient.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ describe.concurrent("InferenceClient", () => {
376376
);
377377
});
378378

379-
it("textGeneration - gpt2", async () => {
379+
it.skip("textGeneration - gpt2", async () => {
380380
expect(
381381
await hf.textGeneration({
382382
model: "gpt2",
@@ -387,7 +387,7 @@ describe.concurrent("InferenceClient", () => {
387387
});
388388
});
389389

390-
it("textGeneration - openai-community/gpt2", async () => {
390+
it.skip("textGeneration - openai-community/gpt2", async () => {
391391
expect(
392392
await hf.textGeneration({
393393
model: "openai-community/gpt2",

packages/ollama-utils/src/chat-template-automap.ts

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,8 @@ import type { OllamaChatTemplateMapEntry } from "./types";
55

66
/**
77
* Skipped these models due to error:
8-
* - library/qwen2:72b
9-
* - library/gemma3:1b
10-
* - library/mistral-small3.1:24b
11-
* - library/gemma3:12b
12-
* - library/gemma:2b
13-
* - library/mistral-nemo:12b
14-
* - library/hermes3:405b
15-
* - library/granite-code:3b
8+
* - library/llama3:70b
9+
* - library/mistrallite:7b
1610
*/
1711

1812
export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
@@ -554,6 +548,24 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
554548
tokens: ["<|tool_call|>", "<co>", "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>"],
555549
},
556550
},
551+
{
552+
model: "library/granite3.3:2b",
553+
gguf: "{# Alias tools -> available_tools #}\n{%- if tools and not available_tools -%}\n {%- set available_tools = tools -%}\n{%- endif -%}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n {%- else %}\n {%- set system_message = \" Knowledge Cutoff Date: April 2024.\n Today's Date: \" + strftime_now('%B %d, %Y') + \". You are Granite, developed by IBM.\" %}\n {%- if available_tools and documents %}\n {%- set system_message = system_message + \" You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request. \nWrite the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif available_tools %}\n {%- set system_message = system_message + \" You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\" %}\n {%- elif documents %}\n {%- set system_message = system_message + \" Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif thinking %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query.\" %}\n {%- else %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\" %}\n {%- endif %}\n {%- if 'citations' in controls and documents %}\n {%- set system_message = system_message + ' \nUse the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}\n {%- endif %}\n {%- if 'hallucinations' in controls and documents %}\n {%- set system_message = system_message + ' \nFinally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}\n {%- endif %}\n {%- set loop_messages = messages %}\n {%- endif %}\n {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>\n' }}\n {%- if available_tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>' }}\n {{- available_tools | tojson(indent=4) }}\n {{- '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if documents %}\n {%- for document in documents %}\n {{- '<|start_of_role|>document {\"document_id\": \"' + document['doc_id'] | string + '\"}<|end_of_role|>\n' }}\n {{- document['text'] }}\n {{- '<|end_of_text|>\n' }}\n {%- endfor %}\n {%- endif %}\n {%- for message in loop_messages %}\n {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant' }}\n {%- if controls %}\n {{- ' ' + controls | tojson()}}\n {%- endif %}\n {{- '<|end_of_role|>' }}\n {%- endif %}\n {%- endfor %}",
554+
ollama: {
555+
template:
556+
'{{- /*\n\n------ MESSAGE PARSING ------\n\n*/}}\n{{- /*\nDeclare the prompt structure variables to be filled in from messages\n*/}}\n{{- $system := "" }}\n{{- $documents := "" }}\n{{- $documentCounter := 0 }}\n{{- $thinking := false }}\n{{- $citations := false }}\n{{- $hallucinations := false }}\n{{- $length := "" }}\n{{- $originality := "" }}\n\n{{- /*\nLoop over messages and look for a user-provided system message and documents\n*/ -}}\n{{- range .Messages }}\n\n {{- /* User defined system prompt(s) */}}\n {{- if (eq .Role "system")}}\n {{- if (ne $system "") }}\n {{- $system = print $system "\\n\\n" }}\n {{- end}}\n {{- $system = print $system .Content }}\n {{- end}}\n\n {{- /*\n NOTE: Since Ollama collates consecutive roles, for control and documents, we\n work around this by allowing the role to contain a qualifier after the\n role string.\n */ -}}\n\n {{- /* Role specified controls */ -}}\n {{- if (and (ge (len .Role) 7) (eq (slice .Role 0 7) "control")) }}\n {{- if (eq .Content "thinking")}}{{- $thinking = true }}{{- end}}\n {{- if (eq .Content "citations")}}{{- $citations = true }}{{- end}}\n {{- if (eq .Content "hallucinations")}}{{- $hallucinations = true }}{{- end}}\n {{- if (and (ge (len .Content) 7) (eq (slice .Content 0 7) "length "))}}\n {{- $length = slice .Content 7 }}\n {{- end}}\n {{- if (and (ge (len .Content) 12) (eq (slice .Content 0 12) "originality "))}}\n {{- $originality = slice .Content 12 }}\n {{- end}}\n {{- end}}\n\n {{- /* Role specified document */ -}}\n {{- if (and (ge (len .Role) 8) (eq (slice .Role 0 8) "document")) }}\n {{- if (ne $documentCounter 0)}}\n {{- $documents = print $documents "\\n\\n"}}\n {{- end}}\n {{- $identifier := ""}}\n {{- if (ge (len .Role) 9) }}\n {{- $identifier = (slice .Role 9)}}\n {{- end}}\n {{- if (eq $identifier "") }}\n {{- $identifier := print $documentCounter}}\n {{- end}}\n {{- $documents = print $documents "<|start_of_role|>document {\\"document_id\\": \\"" $identifier "\\"}<|end_of_role|>\\n" .Content "<|end_of_text|>"}}\n {{- $documentCounter = len (printf "a%*s" $documentCounter "")}}\n {{- end}}\n{{- end}}\n\n{{- /*\nIf no user message provided, build the default system message\n*/ -}}\n{{- if eq $system "" }}\n {{- $system = "Knowledge Cutoff Date: April 2024.\\nYou are Granite, developed by IBM."}}\n\n {{- /* Add Tools prompt */}}\n {{- if .Tools }}\n {{- $system = print $system " You are a helpful assistant with access to the following tools. When a tool is required to answer the user\'s query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." }}\n {{- end}}\n\n {{- /* Add documents prompt */}}\n {{- if $documents }}\n {{- if .Tools }}\n {{- $system = print $system "\\n"}}\n {{- else }}\n {{- $system = print $system " "}}\n {{- end}}\n {{- $system = print $system "Write the response to the user\'s input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." }}\n {{- if $citations}}\n {{- $system = print $system "\\nUse the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list."}}\n {{- end}}\n {{- if $hallucinations}}\n {{- $system = print $system "\\nFinally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents."}}\n {{- end}}\n {{- end}}\n\n {{- /* Prompt without tools or documents */}}\n {{- if (and (not .Tools) (not $documents)) }}\n {{- $system = print $system " You are a helpful AI assistant."}}\n {{- if $thinking}}\n {{- $system = print $system "\\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query."}}\n {{- end}}\n {{- end}}\n\n{{- end}}\n{{- /*\n\n------ TEMPLATE EXPANSION ------\n\n*/}}\n{{- /* System Prompt */ -}}\n<|start_of_role|>system<|end_of_role|>{{- $system }}<|end_of_text|>\n\n{{- /* Tools */ -}}\n{{- if .Tools }}\n<|start_of_role|>available_tools<|end_of_role|>[\n{{- range $index, $_ := .Tools }}\n{{ . }}\n{{- if and (ne (len (slice $.Tools $index)) 1) (gt (len $.Tools) 1) }},\n{{- end}}\n{{- end }}\n]<|end_of_text|>\n{{- end}}\n\n{{- /* Documents */ -}}\n{{- if $documents }}\n{{ $documents }}\n{{- end}}\n\n{{- /* Standard Messages */}}\n{{- range $index, $_ := .Messages }}\n{{- if (and\n (ne .Role "system")\n (or (lt (len .Role) 7) (ne (slice .Role 0 7) "control"))\n (or (lt (len .Role) 8) (ne (slice .Role 0 8) "document"))\n)}}\n<|start_of_role|>\n{{- if eq .Role "tool" }}tool_response\n{{- else }}{{ .Role }}\n{{- end }}<|end_of_role|>\n{{- if .Content }}{{ .Content }}\n{{- else if .ToolCalls }}<|tool_call|>\n{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}\n{{- end }}\n{{- end }}\n{{- if eq (len (slice $.Messages $index)) 1 }}\n{{- if eq .Role "assistant" }}\n{{- else }}<|end_of_text|>\n<|start_of_role|>assistant\n{{- if and (ne $length "") (ne $originality "") }} {"length": "{{ $length }}", "originality": "{{ $originality }}"}\n{{- else if ne $length "" }} {"length": "{{ $length }}"}\n{{- else if ne $originality "" }} {"originality": "{{ $originality }}"}\n{{- end }}<|end_of_role|>\n{{- end -}}\n{{- else }}<|end_of_text|>\n{{- end }}\n{{- end }}\n{{- end }}',
557+
tokens: [
558+
"<|tool_call|>",
559+
"<think>",
560+
"<response>",
561+
"<|start_of_cite|>",
562+
"<|end_of_cite|>",
563+
"<|start_of_role|>",
564+
"<|end_of_role|>",
565+
"<|end_of_text|>",
566+
],
567+
},
568+
},
557569
{
558570
model: "library/hermes3:70b",
559571
gguf: "{{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",

packages/tasks-gen/scripts/inference-tgi-import.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ function toCamelCase(str: string, joiner = "") {
3434
.join(joiner);
3535
}
3636

37+
const OVERRIDES_TYPES_RENAME_PROPERTIES: Record<string, Record<string, string>> = {
38+
ChatCompletionInputFunctionDefinition: { arguments: "parameters" },
39+
};
40+
const OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE: Record<string, Record<string, unknown>> = {
41+
ChatCompletionOutputFunctionDefinition: { arguments: { type: "string" } },
42+
};
43+
3744
async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") {
3845
console.debug(`✨ Importing`, task, type);
3946

@@ -57,6 +64,17 @@ async function _extractAndAdapt(task: string, mainComponentName: string, type: "
5764
_scan(item);
5865
}
5966
} else if (data && typeof data === "object") {
67+
/// This next section can be removed when we don't use TGI as source of types.
68+
if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_RENAME_PROPERTIES) {
69+
const [[oldName, newName]] = Object.entries(OVERRIDES_TYPES_RENAME_PROPERTIES[data.title]);
70+
data.required = JSON.parse(JSON.stringify(data.required).replaceAll(oldName, newName));
71+
data.properties = JSON.parse(JSON.stringify(data.properties).replaceAll(oldName, newName));
72+
}
73+
if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE) {
74+
const [[prop, newType]] = Object.entries(OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE[data.title]);
75+
(data.properties as Record<string, unknown>)[prop] = newType;
76+
}
77+
/// End of overrides section
6078
for (const key of Object.keys(data)) {
6179
if (key === "$ref" && typeof data[key] === "string") {
6280
// Verify reference exists

packages/tasks/src/tasks/chat-completion/inference.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ export interface ChatCompletionInputToolCall {
130130
[property: string]: unknown;
131131
}
132132
export interface ChatCompletionInputFunctionDefinition {
133-
arguments: unknown;
134133
description?: string;
135134
name: string;
135+
parameters: unknown;
136136
[property: string]: unknown;
137137
}
138138
export interface ChatCompletionInputGrammarType {
@@ -235,7 +235,7 @@ export interface ChatCompletionOutputToolCall {
235235
[property: string]: unknown;
236236
}
237237
export interface ChatCompletionOutputFunctionDefinition {
238-
arguments: unknown;
238+
arguments: string;
239239
description?: string;
240240
name: string;
241241
[property: string]: unknown;

packages/tasks/src/tasks/chat-completion/spec/input.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,9 @@
275275
},
276276
"ChatCompletionInputFunctionDefinition": {
277277
"type": "object",
278-
"required": ["name", "arguments"],
278+
"required": ["name", "parameters"],
279279
"properties": {
280-
"arguments": {},
280+
"parameters": {},
281281
"description": {
282282
"type": "string",
283283
"nullable": true

packages/tasks/src/tasks/chat-completion/spec/output.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,9 @@
173173
"type": "object",
174174
"required": ["name", "arguments"],
175175
"properties": {
176-
"arguments": {},
176+
"arguments": {
177+
"type": "string"
178+
},
177179
"description": {
178180
"type": "string",
179181
"nullable": true

0 commit comments

Comments
 (0)