11import type { InferenceSnippet , ModelDataMinimal } from "./types" ;
22import { describe , expect , it } from "vitest" ;
3- import { snippetTextGeneration } from "./js" ;
3+ import { getJsInferenceSnippet } from "./js" ;
44
55describe ( "inference API snippets" , ( ) => {
66 it ( "conversational llm" , async ( ) => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
1010 tags : [ "conversational" ] ,
1111 inference : "" ,
1212 } ;
13- const snippet = snippetTextGeneration ( model , "api_token" ) as InferenceSnippet [ ] ;
13+ const snippet = getJsInferenceSnippet ( model , "api_token" ) as InferenceSnippet [ ] ;
1414
1515 expect ( snippet [ 0 ] . content ) . toEqual ( `import { HfInference } from "@huggingface/inference"
1616
@@ -38,14 +38,41 @@ for await (const chunk of stream) {
3838}` ) ;
3939 } ) ;
4040
41+ it ( "conversational llm non-streaming" , async ( ) => {
42+ const model : ModelDataMinimal = {
43+ id : "meta-llama/Llama-3.1-8B-Instruct" ,
44+ pipeline_tag : "text-generation" ,
45+ tags : [ "conversational" ] ,
46+ inference : "" ,
47+ } ;
48+ const snippet = getJsInferenceSnippet ( model , "api_token" , { streaming : false } ) as InferenceSnippet [ ] ;
49+
50+ expect ( snippet [ 0 ] . content ) . toEqual ( `import { HfInference } from "@huggingface/inference"
51+
52+ const client = new HfInference("api_token")
53+
54+ const chatCompletion = await client.chatCompletion({
55+ model: "meta-llama/Llama-3.1-8B-Instruct",
56+ messages: [
57+ {
58+ role: "user",
59+ content: "What is the capital of France?"
60+ }
61+ ],
62+ max_tokens: 500
63+ });
64+
65+ console.log(chatCompletion.choices[0].message);` ) ;
66+ } ) ;
67+
4168 it ( "conversational vlm" , async ( ) => {
4269 const model : ModelDataMinimal = {
4370 id : "meta-llama/Llama-3.2-11B-Vision-Instruct" ,
4471 pipeline_tag : "image-text-to-text" ,
4572 tags : [ "conversational" ] ,
4673 inference : "" ,
4774 } ;
48- const snippet = snippetTextGeneration ( model , "api_token" ) as InferenceSnippet [ ] ;
75+ const snippet = getJsInferenceSnippet ( model , "api_token" ) as InferenceSnippet [ ] ;
4976
5077 expect ( snippet [ 0 ] . content ) . toEqual ( `import { HfInference } from "@huggingface/inference"
5178
@@ -75,6 +102,41 @@ const stream = client.chatCompletionStream({
75102 max_tokens: 500
76103});
77104
105+ for await (const chunk of stream) {
106+ if (chunk.choices && chunk.choices.length > 0) {
107+ const newContent = chunk.choices[0].delta.content;
108+ out += newContent;
109+ console.log(newContent);
110+ }
111+ }` ) ;
112+ } ) ;
113+
114+ it ( "conversational llm" , async ( ) => {
115+ const model : ModelDataMinimal = {
116+ id : "meta-llama/Llama-3.1-8B-Instruct" ,
117+ pipeline_tag : "text-generation" ,
118+ tags : [ "conversational" ] ,
119+ inference : "" ,
120+ } ;
121+ const snippet = getJsInferenceSnippet ( model , "api_token" ) as InferenceSnippet [ ] ;
122+
123+ expect ( snippet [ 0 ] . content ) . toEqual ( `import { HfInference } from "@huggingface/inference"
124+
125+ const client = new HfInference("api_token")
126+
127+ let out = "";
128+
129+ const stream = client.chatCompletionStream({
130+ model: "meta-llama/Llama-3.1-8B-Instruct",
131+ messages: [
132+ {
133+ role: "user",
134+ content: "What is the capital of France?"
135+ }
136+ ],
137+ max_tokens: 500
138+ });
139+
78140for await (const chunk of stream) {
79141 if (chunk.choices && chunk.choices.length > 0) {
80142 const newContent = chunk.choices[0].delta.content;
0 commit comments