Skip to content

Commit 64a991d

Browse files
authored
Merge branch 'main' into feat/wavespeedai
2 parents 839e940 + 9af23e5 commit 64a991d

File tree

139 files changed

+867
-172
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+867
-172
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9797

9898
```html
9999
<script type="module">
100-
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].0/+esm';
100+
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].2/+esm';
101101
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
102102
</script>
103103
```

packages/inference/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@huggingface/inference",
3-
"version": "4.0.0",
3+
"version": "4.0.2",
44
"packageManager": "[email protected]",
55
"license": "MIT",
66
"author": "Hugging Face and Tim Mikeladze <[email protected]>",

packages/inference/src/package.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2-
export const PACKAGE_VERSION = "4.0.0";
2+
export const PACKAGE_VERSION = "4.0.2";
33
export const PACKAGE_NAME = "@huggingface/inference";

packages/inference/src/snippets/getInferenceSnippets.ts

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
1414
import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
1515
import { templates } from "./templates.exported.js";
1616

17-
export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string } & Record<string, unknown>;
17+
export type InferenceSnippetOptions = {
18+
streaming?: boolean;
19+
billTo?: string;
20+
accessToken?: string;
21+
directRequest?: boolean;
22+
} & Record<string, unknown>;
1823

1924
const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
2025
const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
@@ -121,11 +126,15 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
121126
translation: "translation",
122127
};
123128

129+
// Placeholders to replace with env variable in snippets
130+
// little hack to support both direct requests and routing => routed requests should start with "hf_"
131+
const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
132+
const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
133+
124134
// Snippet generators
125135
const snippetGenerator = (templateName: string, inputPreparationFn?: InputPreparationFn) => {
126136
return (
127137
model: ModelDataMinimal,
128-
accessToken: string,
129138
provider: InferenceProviderOrPolicy,
130139
inferenceProviderMapping?: InferenceProviderModelMapping,
131140
opts?: InferenceSnippetOptions
@@ -149,13 +158,19 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
149158
console.error(`Failed to get provider helper for ${provider} (${task})`, e);
150159
return [];
151160
}
161+
162+
const placeholder = opts?.directRequest
163+
? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
164+
: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
165+
const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
166+
152167
/// Prepare inputs + make request
153168
const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
154169
const request = makeRequestOptionsFromResolvedModel(
155170
providerModelId,
156171
providerHelper,
157172
{
158-
accessToken,
173+
accessToken: accessTokenOrPlaceholder,
159174
provider,
160175
...inputs,
161176
} as RequestArgs,
@@ -180,7 +195,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
180195

181196
/// Prepare template injection data
182197
const params: TemplateParams = {
183-
accessToken,
198+
accessToken: accessTokenOrPlaceholder,
184199
authorizationHeader: (request.info.headers as Record<string, string>)?.Authorization,
185200
baseUrl: removeSuffix(request.url, "/chat/completions"),
186201
fullUrl: request.url,
@@ -248,6 +263,11 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
248263
snippet = `${importSection}\n\n${snippet}`;
249264
}
250265

266+
/// Replace access token placeholder
267+
if (snippet.includes(placeholder)) {
268+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
269+
}
270+
251271
/// Snippet is ready!
252272
return { language, client: client as string, content: snippet };
253273
})
@@ -299,7 +319,6 @@ const snippets: Partial<
299319
PipelineType,
300320
(
301321
model: ModelDataMinimal,
302-
accessToken: string,
303322
provider: InferenceProviderOrPolicy,
304323
inferenceProviderMapping?: InferenceProviderModelMapping,
305324
opts?: InferenceSnippetOptions
@@ -339,13 +358,12 @@ const snippets: Partial<
339358

340359
export function getInferenceSnippets(
341360
model: ModelDataMinimal,
342-
accessToken: string,
343361
provider: InferenceProviderOrPolicy,
344362
inferenceProviderMapping?: InferenceProviderModelMapping,
345363
opts?: Record<string, unknown>
346364
): InferenceSnippet[] {
347365
return model.pipeline_tag && model.pipeline_tag in snippets
348-
? snippets[model.pipeline_tag]?.(model, accessToken, provider, inferenceProviderMapping, opts) ?? []
366+
? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
349367
: [];
350368
}
351369

@@ -420,3 +438,69 @@ function indentString(str: string): string {
420438
function removeSuffix(str: string, suffix: string) {
421439
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
422440
}
441+
442+
function replaceAccessTokenPlaceholder(
443+
directRequest: boolean | undefined,
444+
placeholder: string,
445+
snippet: string,
446+
language: InferenceSnippetLanguage,
447+
provider: InferenceProviderOrPolicy
448+
): string {
449+
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
450+
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
451+
452+
// Determine if HF_TOKEN or specific provider token should be used
453+
const useHfToken =
454+
provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
455+
(!directRequest && // if explicit directRequest => use provider-specific token
456+
(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
457+
snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
458+
459+
const accessTokenEnvVar = useHfToken
460+
? "HF_TOKEN" // e.g. routed request or hf-inference
461+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
462+
463+
// Replace the placeholder with the env variable
464+
if (language === "sh") {
465+
snippet = snippet.replace(
466+
`'Authorization: Bearer ${placeholder}'`,
467+
`"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
468+
);
469+
} else if (language === "python") {
470+
snippet = "import os\n" + snippet;
471+
snippet = snippet.replace(
472+
`"${placeholder}"`,
473+
`os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
474+
);
475+
snippet = snippet.replace(
476+
`"Bearer ${placeholder}"`,
477+
`f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
478+
);
479+
snippet = snippet.replace(
480+
`"Key ${placeholder}"`,
481+
`f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
482+
);
483+
snippet = snippet.replace(
484+
`"X-Key ${placeholder}"`,
485+
`f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
486+
);
487+
} else if (language === "js") {
488+
snippet = snippet.replace(
489+
`"${placeholder}"`,
490+
`process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
491+
);
492+
snippet = snippet.replace(
493+
`Authorization: "Bearer ${placeholder}",`,
494+
`Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
495+
);
496+
snippet = snippet.replace(
497+
`Authorization: "Key ${placeholder}",`,
498+
`Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
499+
);
500+
snippet = snippet.replace(
501+
`Authorization: "X-Key ${placeholder}",`,
502+
`Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
503+
);
504+
}
505+
return snippet;
506+
}

packages/mcp-client/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"prepare": "pnpm run build",
3535
"test": "vitest run",
3636
"check": "tsc",
37-
"agent": "tsx cli.ts"
37+
"cli": "tsx cli.ts"
3838
},
3939
"files": [
4040
"src",

packages/mcp-client/src/Agent.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ const exitLoopTools = [taskCompletionTool, askQuestionTool];
4646

4747
export class Agent extends McpClient {
4848
private readonly servers: (ServerConfig | StdioServerParameters)[];
49+
public readonly prompt: string;
4950
protected messages: ChatCompletionInputMessage[];
5051

5152
constructor({
@@ -73,10 +74,11 @@ export class Agent extends McpClient {
7374
super(provider ? { provider, endpointUrl, model, apiKey } : { provider, endpointUrl, model, apiKey });
7475
/// ^This shenanigan is just here to please an overzealous TS type-checker.
7576
this.servers = servers;
77+
this.prompt = prompt ?? DEFAULT_SYSTEM_PROMPT;
7678
this.messages = [
7779
{
7880
role: "system",
79-
content: prompt ?? DEFAULT_SYSTEM_PROMPT,
81+
content: this.prompt,
8082
},
8183
];
8284
}
@@ -86,19 +88,27 @@ export class Agent extends McpClient {
8688
}
8789

8890
async *run(
89-
input: string,
91+
input: string | ChatCompletionInputMessage[],
9092
opts: { abortSignal?: AbortSignal } = {}
9193
): AsyncGenerator<ChatCompletionStreamOutput | ChatCompletionInputMessageTool> {
92-
this.messages.push({
93-
role: "user",
94-
content: input,
95-
});
94+
let messages: ChatCompletionInputMessage[];
95+
if (typeof input === "string") {
96+
/// Use internal array of messages
97+
this.messages.push({
98+
role: "user",
99+
content: input,
100+
});
101+
messages = this.messages;
102+
} else {
103+
/// Use the passed messages directly
104+
messages = input;
105+
}
96106

97107
let numOfTurns = 0;
98108
let nextTurnShouldCallTools = true;
99109
while (true) {
100110
try {
101-
yield* this.processSingleTurnWithTools(this.messages, {
111+
yield* this.processSingleTurnWithTools(messages, {
102112
exitLoopTools,
103113
exitIfFirstChunkNoTool: numOfTurns > 0 && nextTurnShouldCallTools,
104114
abortSignal: opts.abortSignal,
@@ -111,7 +121,7 @@ export class Agent extends McpClient {
111121
}
112122
numOfTurns++;
113123
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
114-
const currentLast = this.messages.at(-1)!;
124+
const currentLast = messages.at(-1)!;
115125
debug("current role", currentLast.role);
116126
if (
117127
currentLast.role === "tool" &&

packages/mcp-client/src/McpClient.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,18 @@ export class McpClient {
160160
for (const toolCall of delta.tool_calls ?? []) {
161161
// aggregating chunks into an encoded arguments JSON object
162162
if (!finalToolCalls[toolCall.index]) {
163+
/// first chunk of the tool call
163164
finalToolCalls[toolCall.index] = toolCall;
164-
}
165-
if (finalToolCalls[toolCall.index].function.arguments === undefined) {
166-
finalToolCalls[toolCall.index].function.arguments = "";
167-
}
168-
if (toolCall.function.arguments) {
169-
finalToolCalls[toolCall.index].function.arguments += toolCall.function.arguments;
165+
166+
/// ensure .function.arguments is always a string
167+
if (finalToolCalls[toolCall.index].function.arguments === undefined) {
168+
finalToolCalls[toolCall.index].function.arguments = "";
169+
}
170+
} else {
171+
/// any subsequent chunk to the same tool call
172+
if (toolCall.function.arguments) {
173+
finalToolCalls[toolCall.index].function.arguments += toolCall.function.arguments;
174+
}
170175
}
171176
}
172177
if (opts.exitIfFirstChunkNoTool && numOfChunks <= 2 && Object.keys(finalToolCalls).length === 0) {

packages/tasks-gen/scripts/generate-snippets-fixtures.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,30 @@ const TEST_CASES: {
240240
providers: ["hf-inference"],
241241
opts: { billTo: "huggingface" },
242242
},
243+
{
244+
testName: "with-access-token",
245+
task: "conversational",
246+
model: {
247+
id: "meta-llama/Llama-3.1-8B-Instruct",
248+
pipeline_tag: "text-generation",
249+
tags: ["conversational"],
250+
inference: "",
251+
},
252+
providers: ["hf-inference"],
253+
opts: { accessToken: "hf_xxx" },
254+
},
255+
{
256+
testName: "explicit-direct-request",
257+
task: "conversational",
258+
model: {
259+
id: "meta-llama/Llama-3.1-8B-Instruct",
260+
pipeline_tag: "text-generation",
261+
tags: ["conversational"],
262+
inference: "",
263+
},
264+
providers: ["together"],
265+
opts: { directRequest: true },
266+
},
243267
{
244268
testName: "text-to-speech",
245269
task: "text-to-speech",
@@ -314,7 +338,6 @@ function generateInferenceSnippet(
314338
): InferenceSnippet[] {
315339
const allSnippets = snippets.getInferenceSnippets(
316340
model,
317-
"api_token",
318341
provider,
319342
{
320343
hfModelId: model.id,

packages/tasks-gen/snippets-fixtures/automatic-speech-recognition/js/fetch/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ async function query(data) {
33
"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo",
44
{
55
headers: {
6-
Authorization: "Bearer api_token",
6+
Authorization: `Bearer ${process.env.HF_TOKEN}`,
77
"Content-Type": "audio/flac",
88
},
99
method: "POST",

packages/tasks-gen/snippets-fixtures/automatic-speech-recognition/js/huggingface.js/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { InferenceClient } from "@huggingface/inference";
22

3-
const client = new InferenceClient("api_token");
3+
const client = new InferenceClient(process.env.HF_TOKEN);
44

55
const data = fs.readFileSync("sample1.flac");
66

0 commit comments

Comments
 (0)