|
| 1 | +/********************************************************************** |
| 2 | + * Copyright (C) 2024 Red Hat, Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + * |
| 16 | + * SPDX-License-Identifier: Apache-2.0 |
| 17 | + ***********************************************************************/ |
| 18 | + |
| 19 | +import { InferenceProvider } from './InferenceProvider'; |
| 20 | +import type { TaskRegistry } from '../../registries/TaskRegistry'; |
| 21 | +import type { PodmanConnection } from '../../managers/podmanConnection'; |
| 22 | +import { type InferenceServer, InferenceType } from '@shared/models/IInference'; |
| 23 | +import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; |
| 24 | +import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/api'; |
| 25 | +import * as images from '../../assets/inference-images.json'; |
| 26 | +import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; |
| 27 | +import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; |
| 28 | +import { basename, dirname } from 'node:path'; |
| 29 | +import { join as joinposix } from 'node:path/posix'; |
| 30 | +import { getLocalModelFile } from '../../utils/modelsUtils'; |
| 31 | +import { SECOND } from './LlamaCppPython'; |
| 32 | + |
| 33 | +export class VLLM extends InferenceProvider { |
| 34 | + constructor( |
| 35 | + taskRegistry: TaskRegistry, |
| 36 | + private podmanConnection: PodmanConnection, |
| 37 | + ) { |
| 38 | + super(taskRegistry, InferenceType.VLLM, 'vllm'); |
| 39 | + } |
| 40 | + |
| 41 | + dispose(): void {} |
| 42 | + |
| 43 | + public enabled = (): boolean => true; |
| 44 | + |
| 45 | + /** |
| 46 | + * Here is an example |
| 47 | + * |
| 48 | + * podman run -it --rm |
| 49 | + * -v C:\Users\axels\.cache\huggingface\hub\models--mistralai--Mistral-7B-v0.1:/cache/models--mistralai--Mistral-7B-v0.1 |
| 50 | + * -e HF_HUB_CACHE=/cache |
| 51 | + * localhost/vllm-cpu-env:latest |
| 52 | + * --model=/cache/models--mistralai--Mistral-7B-v0.1/snapshots/7231864981174d9bee8c7687c24c8344414eae6b |
| 53 | + * |
| 54 | + * @param config |
| 55 | + */ |
| 56 | + override async perform(config: InferenceServerConfig): Promise<InferenceServer> { |
| 57 | + if (config.modelsInfo.length !== 1) |
| 58 | + throw new Error(`only one model is supported, received ${config.modelsInfo.length}`); |
| 59 | + |
| 60 | + const modelInfo = config.modelsInfo[0]; |
| 61 | + if (modelInfo.backend !== InferenceType.VLLM) { |
| 62 | + throw new Error(`VLLM requires models with backend type ${InferenceType.VLLM} got ${modelInfo.backend}.`); |
| 63 | + } |
| 64 | + |
| 65 | + if (modelInfo.file === undefined) { |
| 66 | + throw new Error('The model info file provided is undefined'); |
| 67 | + } |
| 68 | + |
| 69 | + console.log('[VLLM]', config); |
| 70 | + console.log('[VLLM] modelInfo.file', modelInfo.file.path); |
| 71 | + |
| 72 | + // something ~/.cache/huggingface/hub/models--facebook--opt-125m/snapshots |
| 73 | + // modelInfo.file.path |
| 74 | + |
| 75 | + const fullPath = getLocalModelFile(modelInfo); |
| 76 | + |
| 77 | + // modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash> |
| 78 | + const parent = dirname(fullPath); |
| 79 | + const commitHash = basename(fullPath); |
| 80 | + const name = basename(parent); |
| 81 | + if (name !== 'snapshots') throw new Error('you must provide snapshot path for vllm'); |
| 82 | + const modelCache = dirname(parent); |
| 83 | + |
| 84 | + let connection: ContainerProviderConnection | undefined; |
| 85 | + if (config.connection) { |
| 86 | + connection = this.podmanConnection.getContainerProviderConnection(config.connection); |
| 87 | + } else { |
| 88 | + connection = this.podmanConnection.findRunningContainerProviderConnection(); |
| 89 | + } |
| 90 | + |
| 91 | + if (!connection) throw new Error('no running connection could be found'); |
| 92 | + |
| 93 | + const labels: Record<string, string> = { |
| 94 | + ...config.labels, |
| 95 | + [LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)), |
| 96 | + }; |
| 97 | + |
| 98 | + const imageInfo = await this.pullImage(connection, config.image ?? images.vllm.default, labels); |
| 99 | + // https://huggingface.co/docs/transformers/main/en/installation#offline-mode |
| 100 | + // HF_HUB_OFFLINE in main |
| 101 | + // TRANSFORMERS_OFFLINE for legacy |
| 102 | + const envs: string[] = [`HF_HUB_CACHE=/cache`, 'TRANSFORMERS_OFFLINE=1', 'HF_HUB_OFFLINE=1']; |
| 103 | + |
| 104 | + labels['api'] = `http://localhost:${config.port}/inference`; |
| 105 | + |
| 106 | + const mounts: MountConfig = [ |
| 107 | + { |
| 108 | + Target: `/cache/${modelInfo.id}`, |
| 109 | + Source: modelCache, |
| 110 | + Type: 'bind', |
| 111 | + }, |
| 112 | + ]; |
| 113 | + |
| 114 | + const containerInfo = await this.createContainer( |
| 115 | + imageInfo.engineId, |
| 116 | + { |
| 117 | + Image: imageInfo.Id, |
| 118 | + Detach: true, |
| 119 | + Labels: labels, |
| 120 | + HostConfig: { |
| 121 | + AutoRemove: false, |
| 122 | + Mounts: mounts, |
| 123 | + PortBindings: { |
| 124 | + '8000/tcp': [ |
| 125 | + { |
| 126 | + HostPort: `${config.port}`, |
| 127 | + }, |
| 128 | + ], |
| 129 | + }, |
| 130 | + SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], |
| 131 | + }, |
| 132 | + HealthCheck: { |
| 133 | + // must be the port INSIDE the container not the exposed one |
| 134 | + Test: ['CMD-SHELL', `curl -sSf localhost:8000/version > /dev/null`], |
| 135 | + Interval: SECOND * 5, |
| 136 | + Retries: 4 * 5, |
| 137 | + }, |
| 138 | + Env: envs, |
| 139 | + Cmd: [ |
| 140 | + `--model=${joinposix('/cache', modelInfo.id, 'snapshots', commitHash)}`, |
| 141 | + `--served_model_name=${modelInfo.file.file}`, |
| 142 | + '--chat-template-content-format=openai', |
| 143 | + ], |
| 144 | + }, |
| 145 | + labels, |
| 146 | + ); |
| 147 | + |
| 148 | + return { |
| 149 | + models: [modelInfo], |
| 150 | + status: 'running', |
| 151 | + connection: { |
| 152 | + port: config.port, |
| 153 | + }, |
| 154 | + container: { |
| 155 | + containerId: containerInfo.id, |
| 156 | + engineId: containerInfo.engineId, |
| 157 | + }, |
| 158 | + type: InferenceType.VLLM, |
| 159 | + labels: labels, |
| 160 | + }; |
| 161 | + } |
| 162 | +} |
0 commit comments