Skip to content

Commit f101d13

Browse files
committed
fix: rebase and align with Windows
Signed-off-by: Jeff MAURY <[email protected]>
1 parent b7e21db commit f101d13

File tree

5 files changed

+12
-18
lines changed

5 files changed

+12
-18
lines changed

packages/backend/src/managers/playgroundV2Manager.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ export class PlaygroundV2Manager implements Disposable {
256256

257257
const start = Date.now();
258258
streamProcessor
259-
.stream(model, tools, options)
259+
.stream(model, tools, server.type === InferenceType.VLLM ? {} : options)
260260
.consumeStream()
261261
.then(() => {
262262
this.telemetry.logUsage('playground.message.complete', {

packages/backend/src/models/HuggingFaceModelHandler.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import type { CompletionEvent } from './baseEvent';
2323
import { getDurationSecondsSince } from '../utils/utils';
2424
import type { ModelsManager } from '../managers/modelsManager';
2525
import fs from 'node:fs/promises';
26-
import { dirname, basename } from 'node:path';
2726

2827
function parseURL(url: string): { repo: string; revision?: string } | undefined {
2928
const u = URL.parse(url);

packages/backend/src/workers/provider/VLLM.ts

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/a
2525
import * as images from '../../assets/inference-images.json';
2626
import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
2727
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
28-
import { basename, dirname } from 'node:path';
29-
import { join as joinposix } from 'node:path/posix';
30-
import { getLocalModelFile } from '../../utils/modelsUtils';
28+
import { getHuggingFaceModelMountInfo } from '../../utils/modelsUtils';
3129
import { SECOND } from './LlamaCppPython';
3230

3331
export class VLLM extends InferenceProvider {
@@ -72,14 +70,9 @@ export class VLLM extends InferenceProvider {
7270
// something ~/.cache/huggingface/hub/models--facebook--opt-125m/snapshots
7371
// modelInfo.file.path
7472

75-
const fullPath = getLocalModelFile(modelInfo);
76-
77-
// modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
78-
const parent = dirname(fullPath);
79-
const commitHash = basename(fullPath);
80-
const name = basename(parent);
81-
if (name !== 'snapshots') throw new Error('you must provide snapshot path for vllm');
82-
const modelCache = dirname(parent);
73+
// get model mount settings
74+
const mountInfo = getHuggingFaceModelMountInfo(modelInfo);
75+
const modelCache = mountInfo.suffix ? `/cache/${mountInfo.suffix}` : '/cache';
8376

8477
let connection: ContainerProviderConnection | undefined;
8578
if (config.connection) {
@@ -101,12 +94,12 @@ export class VLLM extends InferenceProvider {
10194
// TRANSFORMERS_OFFLINE for legacy
10295
const envs: string[] = [`HF_HUB_CACHE=/cache`, 'TRANSFORMERS_OFFLINE=1', 'HF_HUB_OFFLINE=1'];
10396

104-
labels['api'] = `http://localhost:${config.port}/inference`;
97+
labels['api'] = `http://localhost:${config.port}/v1`;
10598

10699
const mounts: MountConfig = [
107100
{
108-
Target: `/cache/${modelInfo.id}`,
109-
Source: modelCache,
101+
Target: `/cache`,
102+
Source: mountInfo.mount,
110103
Type: 'bind',
111104
},
112105
];
@@ -137,8 +130,8 @@ export class VLLM extends InferenceProvider {
137130
},
138131
Env: envs,
139132
Cmd: [
140-
`--model=${joinposix('/cache', modelInfo.id, 'snapshots', commitHash)}`,
141-
`--served_model_name=${modelInfo.file.file}`,
133+
`--model=${modelCache}`,
134+
`--served_model_name=${modelInfo.name}`,
142135
'--chat-template-content-format=openai',
143136
],
144137
},

packages/frontend/src/pages/InferenceServerDetails.svelte

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ const generate = async (language: string, variant: string): Promise<void> => {
5656
let options: RequestOptions | undefined;
5757
switch (service?.type) {
5858
case InferenceType.LLAMA_CPP:
59+
case InferenceType.VLLM:
5960
options = {
6061
url: `http://localhost:${service?.connection.port || '??'}/v1/chat/completions`,
6162
method: 'POST',

packages/shared/src/models/IInference.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const InferenceTypeLabel = {
2929
'llama-cpp': 'llamacpp',
3030
'whisper-cpp': 'whispercpp',
3131
openvino: 'openvino',
32+
vllm: 'vLLM',
3233
none: 'None',
3334
};
3435

0 commit comments

Comments
 (0)