11import type { ModelData } from "./model-data" ;
22import type { PipelineType } from "./pipelines" ;
3+ import { parseGGUFQuantLabel } from "@huggingface/gguf" ;
34
45export interface LocalAppSnippet {
56 /**
@@ -53,6 +54,7 @@ export type LocalApp = {
5354 /**
5455 * And if not (mostly llama.cpp), snippet to copy/paste in your terminal
5556 * Support the placeholder {{GGUF_FILE}} that will be replaced by the gguf file path or the list of available files.
57+ * Support the placeholder {{OLLAMA_TAG}} that will be replaced by the list of available quant tags or will be removed if there are no multiple quant files in a same repo.
5658 */
5759 snippet : ( model : ModelData , filepath ?: string ) => string | string [ ] | LocalAppSnippet | LocalAppSnippet [ ] ;
5860 }
@@ -77,11 +79,18 @@ function isMarlinModel(model: ModelData): boolean {
7779function isTransformersModel ( model : ModelData ) : boolean {
7880 return model . tags . includes ( "transformers" ) ;
7981}
82+ function isTgiModel ( model : ModelData ) : boolean {
83+ return model . tags . includes ( "text-generation-inference" ) ;
84+ }
8085
8186function isLlamaCppGgufModel ( model : ModelData ) {
8287 return ! ! model . gguf ?. context_length ;
8388}
8489
90+ function isMlxModel ( model : ModelData ) {
91+ return model . tags . includes ( "mlx" ) ;
92+ }
93+
8594const snippetLlamacpp = ( model : ModelData , filepath ?: string ) : LocalAppSnippet [ ] => {
8695 const command = ( binary : string ) =>
8796 [
@@ -119,6 +128,32 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
119128 ] ;
120129} ;
121130
131+ const snippetNodeLlamaCppCli = ( model : ModelData , filepath ?: string ) : LocalAppSnippet [ ] => {
132+ return [
133+ {
134+ title : "Chat with the model" ,
135+ content : [
136+ `npx -y node-llama-cpp chat \\` ,
137+ ` --model "hf:${ model . id } /${ filepath ?? "{{GGUF_FILE}}" } " \\` ,
138+ ` --prompt 'Hi there!'` ,
139+ ] . join ( "\n" ) ,
140+ } ,
141+ {
142+ title : "Estimate the model compatibility with your hardware" ,
143+ content : `npx -y node-llama-cpp inspect estimate "hf:${ model . id } /${ filepath ?? "{{GGUF_FILE}}" } "` ,
144+ } ,
145+ ] ;
146+ } ;
147+
148+ const snippetOllama = ( model : ModelData , filepath ?: string ) : string => {
149+ if ( filepath ) {
150+ const quantLabel = parseGGUFQuantLabel ( filepath ) ;
151+ const ollamatag = quantLabel ? `:${ quantLabel } ` : "" ;
152+ return `ollama run hf.co/${ model . id } ${ ollamatag } ` ;
153+ }
154+ return `ollama run hf.co/${ model . id } {{OLLAMA_TAG}}` ;
155+ } ;
156+
122157const snippetLocalAI = ( model : ModelData , filepath ?: string ) : LocalAppSnippet [ ] => {
123158 const command = ( binary : string ) =>
124159 [ "# Load and run the model:" , `${ binary } huggingface://${ model . id } /${ filepath ?? "{{GGUF_FILE}}" } ` ] . join ( "\n" ) ;
@@ -180,6 +215,34 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
180215 } ,
181216 ] ;
182217} ;
218+ const snippetTgi = ( model : ModelData ) : LocalAppSnippet [ ] => {
219+ const runCommand = [
220+ "# Call the server using curl:" ,
221+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\` ,
222+ ` -H "Content-Type: application/json" \\` ,
223+ ` --data '{` ,
224+ ` "model": "${ model . id } ",` ,
225+ ` "messages": [` ,
226+ ` {"role": "user", "content": "What is the capital of France?"}` ,
227+ ` ]` ,
228+ ` }'` ,
229+ ] ;
230+ return [
231+ {
232+ title : "Use Docker images" ,
233+ setup : [
234+ "# Deploy with docker on Linux:" ,
235+ `docker run --gpus all \\` ,
236+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\` ,
237+ ` -e HF_TOKEN="<secret>" \\` ,
238+ ` -p 8000:80 \\` ,
239+ ` ghcr.io/huggingface/text-generation-inference:latest \\` ,
240+ ` --model-id ${ model . id } ` ,
241+ ] . join ( "\n" ) ,
242+ content : [ runCommand . join ( "\n" ) ] ,
243+ } ,
244+ ] ;
245+ } ;
183246
184247/**
185248 * Add your new local app here.
@@ -200,6 +263,13 @@ export const LOCAL_APPS = {
200263 displayOnModelPage : isLlamaCppGgufModel ,
201264 snippet : snippetLlamacpp ,
202265 } ,
266+ "node-llama-cpp" : {
267+ prettyLabel : "node-llama-cpp" ,
268+ docsUrl : "https://node-llama-cpp.withcat.ai" ,
269+ mainTask : "text-generation" ,
270+ displayOnModelPage : isLlamaCppGgufModel ,
271+ snippet : snippetNodeLlamaCppCli ,
272+ } ,
203273 vllm : {
204274 prettyLabel : "vLLM" ,
205275 docsUrl : "https://docs.vllm.ai" ,
@@ -214,11 +284,18 @@ export const LOCAL_APPS = {
214284 ( model . pipeline_tag === "text-generation" || model . pipeline_tag === "image-text-to-text" ) ,
215285 snippet : snippetVllm ,
216286 } ,
287+ tgi : {
288+ prettyLabel : "TGI" ,
289+ docsUrl : "https://huggingface.co/docs/text-generation-inference/" ,
290+ mainTask : "text-generation" ,
291+ displayOnModelPage : isTgiModel ,
292+ snippet : snippetTgi ,
293+ } ,
217294 lmstudio : {
218295 prettyLabel : "LM Studio" ,
219296 docsUrl : "https://lmstudio.ai" ,
220297 mainTask : "text-generation" ,
221- displayOnModelPage : isLlamaCppGgufModel ,
298+ displayOnModelPage : ( model ) => isLlamaCppGgufModel ( model ) || isMlxModel ( model ) ,
222299 deeplink : ( model , filepath ) =>
223300 new URL ( `lmstudio://open_from_hf?model=${ model . id } ${ filepath ? `&file=${ filepath } ` : "" } ` ) ,
224301 } ,
@@ -323,6 +400,13 @@ export const LOCAL_APPS = {
323400 displayOnModelPage : ( model ) => model . library_name === "diffusers" && model . pipeline_tag === "text-to-image" ,
324401 deeplink : ( model ) => new URL ( `https://models.invoke.ai/huggingface/${ model . id } ` ) ,
325402 } ,
403+ ollama : {
404+ prettyLabel : "Ollama" ,
405+ docsUrl : "https://ollama.com" ,
406+ mainTask : "text-generation" ,
407+ displayOnModelPage : isLlamaCppGgufModel ,
408+ snippet : snippetOllama ,
409+ } ,
326410} satisfies Record < string , LocalApp > ;
327411
328412export type LocalAppKey = keyof typeof LOCAL_APPS ;
0 commit comments