Skip to content

Commit a3e8413

Browse files
authored
Merge branch 'main' into fix-inference-search-param
2 parents e5deda7 + a444bd0 commit a3e8413

File tree

44 files changed

+435
-92
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+435
-92
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9797

9898
```html
9999
<script type="module">
100-
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].0/+esm';
100+
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].3/+esm';
101101
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
102102
</script>
103103
```

packages/inference/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@huggingface/inference",
3-
"version": "4.0.0",
3+
"version": "4.0.3",
44
"packageManager": "[email protected]",
55
"license": "MIT",
66
"author": "Hugging Face and Tim Mikeladze <[email protected]>",

packages/inference/src/package.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2-
export const PACKAGE_VERSION = "4.0.0";
2+
export const PACKAGE_VERSION = "4.0.3";
33
export const PACKAGE_NAME = "@huggingface/inference";

packages/inference/src/snippets/getInferenceSnippets.ts

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
1414
import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
1515
import { templates } from "./templates.exported.js";
1616

17-
export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string; accessToken?: string } & Record<
18-
string,
19-
unknown
20-
>;
17+
export type InferenceSnippetOptions = {
18+
streaming?: boolean;
19+
billTo?: string;
20+
accessToken?: string;
21+
directRequest?: boolean;
22+
} & Record<string, unknown>;
2123

2224
const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
2325
const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
@@ -124,7 +126,10 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
124126
translation: "translation",
125127
};
126128

127-
const ACCESS_TOKEN_PLACEHOLDER = "<ACCESS_TOKEN>"; // Placeholder to replace with env variable in snippets
129+
// Placeholders to replace with env variable in snippets
130+
// little hack to support both direct requests and routing => routed requests should start with "hf_"
131+
const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
132+
const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
128133

129134
// Snippet generators
130135
const snippetGenerator = (templateName: string, inputPreparationFn?: InputPreparationFn) => {
@@ -153,7 +158,11 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
153158
console.error(`Failed to get provider helper for ${provider} (${task})`, e);
154159
return [];
155160
}
156-
const accessTokenOrPlaceholder = opts?.accessToken ?? ACCESS_TOKEN_PLACEHOLDER;
161+
162+
const placeholder = opts?.directRequest
163+
? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
164+
: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
165+
const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
157166

158167
/// Prepare inputs + make request
159168
const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
@@ -255,8 +264,8 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
255264
}
256265

257266
/// Replace access token placeholder
258-
if (snippet.includes(ACCESS_TOKEN_PLACEHOLDER)) {
259-
snippet = replaceAccessTokenPlaceholder(snippet, language, provider);
267+
if (snippet.includes(placeholder)) {
268+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
260269
}
261270

262271
/// Snippet is ready!
@@ -431,6 +440,8 @@ function removeSuffix(str: string, suffix: string) {
431440
}
432441

433442
function replaceAccessTokenPlaceholder(
443+
directRequest: boolean | undefined,
444+
placeholder: string,
434445
snippet: string,
435446
language: InferenceSnippetLanguage,
436447
provider: InferenceProviderOrPolicy
@@ -439,46 +450,57 @@ function replaceAccessTokenPlaceholder(
439450
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
440451

441452
// Determine if HF_TOKEN or specific provider token should be used
442-
const accessTokenEnvVar =
443-
!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
444-
snippet.includes("https://router.huggingface.co") || // explicit routed request => use $HF_TOKEN
445-
provider == "hf-inference" // hf-inference provider => use $HF_TOKEN
446-
? "HF_TOKEN"
447-
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
453+
const useHfToken =
454+
provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
455+
(!directRequest && // if explicit directRequest => use provider-specific token
456+
(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
457+
snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
458+
459+
const accessTokenEnvVar = useHfToken
460+
? "HF_TOKEN" // e.g. routed request or hf-inference
461+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
448462

449463
// Replace the placeholder with the env variable
450464
if (language === "sh") {
451465
snippet = snippet.replace(
452-
`'Authorization: Bearer ${ACCESS_TOKEN_PLACEHOLDER}'`,
466+
`'Authorization: Bearer ${placeholder}'`,
453467
`"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
454468
);
455469
} else if (language === "python") {
456470
snippet = "import os\n" + snippet;
457471
snippet = snippet.replace(
458-
`"${ACCESS_TOKEN_PLACEHOLDER}"`,
472+
`"${placeholder}"`,
459473
`os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
460474
);
461475
snippet = snippet.replace(
462-
`"Bearer ${ACCESS_TOKEN_PLACEHOLDER}"`,
476+
`"Bearer ${placeholder}"`,
463477
`f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
464478
);
465479
snippet = snippet.replace(
466-
`"Key ${ACCESS_TOKEN_PLACEHOLDER}"`,
480+
`"Key ${placeholder}"`,
467481
`f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
468482
);
483+
snippet = snippet.replace(
484+
`"X-Key ${placeholder}"`,
485+
`f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
486+
);
469487
} else if (language === "js") {
470488
snippet = snippet.replace(
471-
`"${ACCESS_TOKEN_PLACEHOLDER}"`,
489+
`"${placeholder}"`,
472490
`process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
473491
);
474492
snippet = snippet.replace(
475-
`Authorization: "Bearer ${ACCESS_TOKEN_PLACEHOLDER}",`,
493+
`Authorization: "Bearer ${placeholder}",`,
476494
`Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
477495
);
478496
snippet = snippet.replace(
479-
`Authorization: "Key ${ACCESS_TOKEN_PLACEHOLDER}",`,
497+
`Authorization: "Key ${placeholder}",`,
480498
`Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
481499
);
500+
snippet = snippet.replace(
501+
`Authorization: "X-Key ${placeholder}",`,
502+
`Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
503+
);
482504
}
483505
return snippet;
484506
}

packages/inference/src/snippets/templates/python/huggingface_hub/tableQuestionAnswering.jinja

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
answer = client.question_answering(
1+
answer = client.table_question_answering(
22
query="{{ inputs.asObj.query }}",
33
table={{ inputs.asObj.table }},
44
model="{{ model.id }}",

packages/mcp-client/src/Agent.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ const exitLoopTools = [taskCompletionTool, askQuestionTool];
4646

4747
export class Agent extends McpClient {
4848
private readonly servers: (ServerConfig | StdioServerParameters)[];
49+
public readonly prompt: string;
4950
protected messages: ChatCompletionInputMessage[];
5051

5152
constructor({
@@ -73,10 +74,11 @@ export class Agent extends McpClient {
7374
super(provider ? { provider, endpointUrl, model, apiKey } : { provider, endpointUrl, model, apiKey });
7475
/// ^This shenanigan is just here to please an overzealous TS type-checker.
7576
this.servers = servers;
77+
this.prompt = prompt ?? DEFAULT_SYSTEM_PROMPT;
7678
this.messages = [
7779
{
7880
role: "system",
79-
content: prompt ?? DEFAULT_SYSTEM_PROMPT,
81+
content: this.prompt,
8082
},
8183
];
8284
}
@@ -86,19 +88,27 @@ export class Agent extends McpClient {
8688
}
8789

8890
async *run(
89-
input: string,
91+
input: string | ChatCompletionInputMessage[],
9092
opts: { abortSignal?: AbortSignal } = {}
9193
): AsyncGenerator<ChatCompletionStreamOutput | ChatCompletionInputMessageTool> {
92-
this.messages.push({
93-
role: "user",
94-
content: input,
95-
});
94+
let messages: ChatCompletionInputMessage[];
95+
if (typeof input === "string") {
96+
/// Use internal array of messages
97+
this.messages.push({
98+
role: "user",
99+
content: input,
100+
});
101+
messages = this.messages;
102+
} else {
103+
/// Use the passed messages directly
104+
messages = input;
105+
}
96106

97107
let numOfTurns = 0;
98108
let nextTurnShouldCallTools = true;
99109
while (true) {
100110
try {
101-
yield* this.processSingleTurnWithTools(this.messages, {
111+
yield* this.processSingleTurnWithTools(messages, {
102112
exitLoopTools,
103113
exitIfFirstChunkNoTool: numOfTurns > 0 && nextTurnShouldCallTools,
104114
abortSignal: opts.abortSignal,
@@ -111,7 +121,7 @@ export class Agent extends McpClient {
111121
}
112122
numOfTurns++;
113123
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
114-
const currentLast = this.messages.at(-1)!;
124+
const currentLast = messages.at(-1)!;
115125
debug("current role", currentLast.role);
116126
if (
117127
currentLast.role === "tool" &&

packages/ollama-utils/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/ollama-utils",
33
"packageManager": "[email protected]",
4-
"version": "0.0.11",
4+
"version": "0.0.12",
55
"description": "Various utilities for maintaining Ollama compatibility with models on Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/ollama-utils/src/chat-template-automap.ts

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@ import type { OllamaChatTemplateMapEntry } from "./types";
55

66
/**
77
* Skipped these models due to error:
8-
* - library/llama4:latest
9-
* - library/llama3.1:latest
10-
* - library/granite3.2:latest
8+
* - library/llama4:16x17b
9+
* - library/llama3.3:latest
10+
* - library/dolphin3:8b
11+
* - library/qwen2-math:latest
12+
* - library/qwen2-math:1.5b
13+
* - library/marco-o1:latest
14+
* - library/bespoke-minicheck:7b
15+
* - library/falcon2:11b
1116
*/
1217

1318
export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
@@ -269,6 +274,33 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
269274
},
270275
},
271276
},
277+
{
278+
model: "library/deepseek-r1:latest",
279+
gguf: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
280+
ollama: {
281+
template:
282+
'{{- if .System }}{{ .System }}{{ end }}\n{{- range $i, $_ := .Messages }}\n{{- $last := eq (len (slice $.Messages $i)) 1}}\n{{- if eq .Role "user" }}<|User|>{{ .Content }}\n{{- else if eq .Role "assistant" }}<|Assistant|>\n {{- if and $.IsThinkSet (and $last .Thinking) -}}\n<think>\n{{ .Thinking }}\n</think>\n{{- end }}{{ .Content }}{{- if not $last }}<|end▁of▁sentence|>{{- end }}\n{{- end }}\n{{- if and $last (ne .Role "assistant") }}<|Assistant|>\n{{- if and $.IsThinkSet (not $.Think) -}}\n<think>\n\n</think>\n\n{{ end }}\n{{- end -}}\n{{- end }}',
283+
tokens: [
284+
"<|User|>",
285+
"<|Assistant|>",
286+
"<|tool▁outputs▁end|>",
287+
"<|tool▁calls▁begin|>",
288+
"<|tool▁call▁begin|>",
289+
"<|tool▁sep|>",
290+
"<|tool▁call▁end|>",
291+
"<|tool▁calls▁end|>",
292+
"<|end▁of▁sentence|>",
293+
"<|tool▁outputs▁begin|>",
294+
"<|tool▁output▁begin|>",
295+
"<|tool▁output▁end|>",
296+
],
297+
params: {
298+
stop: ["<|begin▁of▁sentence|>", "<|end▁of▁sentence|>", "<|User|>", "<|Assistant|>"],
299+
temperature: 0.6,
300+
top_p: 0.95,
301+
},
302+
},
303+
},
272304
{
273305
model: "library/deepseek-v2.5:236b",
274306
gguf: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",

packages/tasks-gen/scripts/generate-snippets-fixtures.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,18 @@ const TEST_CASES: {
252252
providers: ["hf-inference"],
253253
opts: { accessToken: "hf_xxx" },
254254
},
255+
{
256+
testName: "explicit-direct-request",
257+
task: "conversational",
258+
model: {
259+
id: "meta-llama/Llama-3.1-8B-Instruct",
260+
pipeline_tag: "text-generation",
261+
tags: ["conversational"],
262+
inference: "",
263+
},
264+
providers: ["together"],
265+
opts: { directRequest: true },
266+
},
255267
{
256268
testName: "text-to-speech",
257269
task: "text-to-speech",

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { OpenAI } from "openai";
22

33
const client = new OpenAI({
4-
baseURL: "https://api.together.xyz/v1",
5-
apiKey: process.env.TOGETHER_API_KEY,
4+
baseURL: "https://router.huggingface.co/together/v1",
5+
apiKey: process.env.HF_TOKEN,
66
});
77

88
const chatCompletion = await client.chat.completions.create({

0 commit comments

Comments
 (0)