@@ -25,9 +25,7 @@ import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/a
25
25
import * as images from '../../assets/inference-images.json' ;
26
26
import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils' ;
27
27
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils' ;
28
- import { basename , dirname } from 'node:path' ;
29
- import { join as joinposix } from 'node:path/posix' ;
30
- import { getLocalModelFile } from '../../utils/modelsUtils' ;
28
+ import { getHuggingFaceModelMountInfo } from '../../utils/modelsUtils' ;
31
29
import { SECOND } from './LlamaCppPython' ;
32
30
33
31
export class VLLM extends InferenceProvider {
@@ -72,14 +70,9 @@ export class VLLM extends InferenceProvider {
72
70
// something ~/.cache/huggingface/hub/models--facebook--opt-125m/snapshots
73
71
// modelInfo.file.path
74
72
75
- const fullPath = getLocalModelFile ( modelInfo ) ;
76
-
77
- // modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
78
- const parent = dirname ( fullPath ) ;
79
- const commitHash = basename ( fullPath ) ;
80
- const name = basename ( parent ) ;
81
- if ( name !== 'snapshots' ) throw new Error ( 'you must provide snapshot path for vllm' ) ;
82
- const modelCache = dirname ( parent ) ;
73
+ // get model mount settings
74
+ const mountInfo = getHuggingFaceModelMountInfo ( modelInfo ) ;
75
+ const modelCache = mountInfo . suffix ? `/cache/${ mountInfo . suffix } ` : '/cache' ;
83
76
84
77
let connection : ContainerProviderConnection | undefined ;
85
78
if ( config . connection ) {
@@ -101,12 +94,12 @@ export class VLLM extends InferenceProvider {
101
94
// TRANSFORMERS_OFFLINE for legacy
102
95
const envs : string [ ] = [ `HF_HUB_CACHE=/cache` , 'TRANSFORMERS_OFFLINE=1' , 'HF_HUB_OFFLINE=1' ] ;
103
96
104
- labels [ 'api' ] = `http://localhost:${ config . port } /inference ` ;
97
+ labels [ 'api' ] = `http://localhost:${ config . port } /v1 ` ;
105
98
106
99
const mounts : MountConfig = [
107
100
{
108
- Target : `/cache/ ${ modelInfo . id } ` ,
109
- Source : modelCache ,
101
+ Target : `/cache` ,
102
+ Source : mountInfo . mount ,
110
103
Type : 'bind' ,
111
104
} ,
112
105
] ;
@@ -137,8 +130,8 @@ export class VLLM extends InferenceProvider {
137
130
} ,
138
131
Env : envs ,
139
132
Cmd : [
140
- `--model=${ joinposix ( '/cache' , modelInfo . id , 'snapshots' , commitHash ) } ` ,
141
- `--served_model_name=${ modelInfo . file . file } ` ,
133
+ `--model=${ modelCache } ` ,
134
+ `--served_model_name=${ modelInfo . name } ` ,
142
135
'--chat-template-content-format=openai' ,
143
136
] ,
144
137
} ,
0 commit comments