fix: rebase and align with Windows

jeffmaury · jeffmaury · commit f101d1329624 · 2025-05-15T16:24:07.000+02:00
Signed-off-by: Jeff MAURY &lt;jmaury@redhat.com&gt;
diff --git a/packages/backend/src/managers/playgroundV2Manager.ts b/packages/backend/src/managers/playgroundV2Manager.ts
@@ -256,7 +256,7 @@ export class PlaygroundV2Manager implements Disposable {
 
     const start = Date.now();
     streamProcessor
-      .stream(model, tools, options)
+      .stream(model, tools, server.type === InferenceType.VLLM ? {} : options)
       .consumeStream()
       .then(() => {
         this.telemetry.logUsage('playground.message.complete', {
diff --git a/packages/backend/src/models/HuggingFaceModelHandler.ts b/packages/backend/src/models/HuggingFaceModelHandler.ts
@@ -23,7 +23,6 @@ import type { CompletionEvent } from './baseEvent';
 import { getDurationSecondsSince } from '../utils/utils';
 import type { ModelsManager } from '../managers/modelsManager';
 import fs from 'node:fs/promises';
-import { dirname, basename } from 'node:path';
 
 function parseURL(url: string): { repo: string; revision?: string } | undefined {
   const u = URL.parse(url);
diff --git a/packages/backend/src/workers/provider/VLLM.ts b/packages/backend/src/workers/provider/VLLM.ts
@@ -25,9 +25,7 @@ import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/a
 import * as images from '../../assets/inference-images.json';
 import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
 import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
-import { basename, dirname } from 'node:path';
-import { join as joinposix } from 'node:path/posix';
-import { getLocalModelFile } from '../../utils/modelsUtils';
+import { getHuggingFaceModelMountInfo } from '../../utils/modelsUtils';
 import { SECOND } from './LlamaCppPython';
 
 export class VLLM extends InferenceProvider {
@@ -72,14 +70,9 @@ export class VLLM extends InferenceProvider {
     // something ~/.cache/huggingface/hub/models--facebook--opt-125m/snapshots
     // modelInfo.file.path
 
-    const fullPath = getLocalModelFile(modelInfo);
-
-    // modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
-    const parent = dirname(fullPath);
-    const commitHash = basename(fullPath);
-    const name = basename(parent);
-    if (name !== 'snapshots') throw new Error('you must provide snapshot path for vllm');
-    const modelCache = dirname(parent);
+    // get model mount settings
+    const mountInfo = getHuggingFaceModelMountInfo(modelInfo);
+    const modelCache = mountInfo.suffix ? `/cache/${mountInfo.suffix}` : '/cache';
 
     let connection: ContainerProviderConnection | undefined;
     if (config.connection) {
@@ -101,12 +94,12 @@ export class VLLM extends InferenceProvider {
     // TRANSFORMERS_OFFLINE for legacy
     const envs: string[] = [`HF_HUB_CACHE=/cache`, 'TRANSFORMERS_OFFLINE=1', 'HF_HUB_OFFLINE=1'];
 
-    labels['api'] = `http://localhost:${config.port}/inference`;
+    labels['api'] = `http://localhost:${config.port}/v1`;
 
     const mounts: MountConfig = [
       {
-        Target: `/cache/${modelInfo.id}`,
-        Source: modelCache,
+        Target: `/cache`,
+        Source: mountInfo.mount,
         Type: 'bind',
       },
     ];
@@ -137,8 +130,8 @@ export class VLLM extends InferenceProvider {
         },
         Env: envs,
         Cmd: [
-          `--model=${joinposix('/cache', modelInfo.id, 'snapshots', commitHash)}`,
-          `--served_model_name=${modelInfo.file.file}`,
+          `--model=${modelCache}`,
+          `--served_model_name=${modelInfo.name}`,
           '--chat-template-content-format=openai',
         ],
       },
diff --git a/packages/frontend/src/pages/InferenceServerDetails.svelte b/packages/frontend/src/pages/InferenceServerDetails.svelte
@@ -56,6 +56,7 @@ const generate = async (language: string, variant: string): Promise<void> => {
   let options: RequestOptions | undefined;
   switch (service?.type) {
     case InferenceType.LLAMA_CPP:
+    case InferenceType.VLLM:
       options = {
         url: `http://localhost:${service?.connection.port || '??'}/v1/chat/completions`,
         method: 'POST',
diff --git a/packages/shared/src/models/IInference.ts b/packages/shared/src/models/IInference.ts
@@ -29,6 +29,7 @@ const InferenceTypeLabel = {
   'llama-cpp': 'llamacpp',
   'whisper-cpp': 'whispercpp',
   openvino: 'openvino',
+  vllm: 'vLLM',
   none: 'None',
 };