Skip to content

Commit 42f25b0

Browse files
authored
feat: add OpenVINO inference provider (#2848)
* feat: add OpenVINO inference provider Fixes #2847 Signed-off-by: Jeff MAURY <[email protected]>
1 parent ac99c7c commit 42f25b0

20 files changed

+687
-30
lines changed

packages/backend/src/assets/ai.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,15 @@
517517
},
518518
"memory": 4372811936,
519519
"backend": "llama-cpp"
520+
},
521+
{
522+
"id": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
523+
"name": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
524+
"description": "# Mistral-7B-Instruct-v0.2-int4-ov\n* Model creator: [Mistral AI](https://huggingface.co/mistralai)\n * Original model: [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)\n\n## Description\n\nThis is [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) model converted to the [OpenVINO™ IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) (Intermediate Representation) format.\n\n## Compatibility\n\nThe provided OpenVINO™ IR model is compatible with:\n\n* OpenVINO version 2024.2.0 and higher\n* Optimum Intel 1.19.0 and higher\n\n## Running Model Inference with [Optimum Intel](https://huggingface.co/docs/optimum/intel/index)\n\n\n1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:\n\n```\npip install optimum[openvino]\n```\n\n2. Run model inference:\n\n```\nfrom transformers import AutoTokenizer\nfrom optimum.intel.openvino import OVModelForCausalLM\n\nmodel_id = \"OpenVINO/<model_name>\"\ntokenizer = AutoTokenizer.from_pretrained(model_id)\nmodel = OVModelForCausalLM.from_pretrained(model_id)\n\ninputs = tokenizer(\"What is OpenVINO?\", return_tensors=\"pt\")\n\noutputs = model.generate(**inputs, max_length=200)\ntext = tokenizer.batch_decode(outputs)[0]\nprint(text)\n```\n\nFor more examples and possible optimizations, refer to the [OpenVINO Large Language Model Inference Guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n\n## Running Model Inference with [OpenVINO GenAI](https://github.com/openvinotoolkit/openvino.genai)\n\n1. Install packages required for using OpenVINO GenAI.\n```\npip install openvino-genai huggingface_hub\n```\n\n2. Download model from HuggingFace Hub\n \n```\nimport huggingface_hub as hf_hub\n\nmodel_id = \"OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov\"\nmodel_path = \"Mistral-7B-Instruct-v0.2-int4-ov\"\n\nhf_hub.snapshot_download(model_id, local_dir=model_path)\n\n```\n\n3. Run model inference:\n\n```\nimport openvino_genai as ov_genai\n\ndevice = \"CPU\"\npipe = ov_genai.LLMPipeline(model_path, device)\nprint(pipe.generate(\"What is OpenVINO?\", max_length=200))\n```\n\nMore GenAI usage examples can be found in OpenVINO GenAI library [docs](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md) and [samples](https://github.com/openvinotoolkit/openvino.genai?tab=readme-ov-file#openvino-genai-samples)\n\n## Limitations\n\nCheck the original model card for [limitations](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2#limitations).\n\n## Legal information\n\nThe original model is distributed under [apache-2.0](https://choosealicense.com/licenses/apache-2.0/) license. More details can be found in [original model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).\n\n## Disclaimer\n\nIntel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.",
525+
"registry": "Hugging Face",
526+
"license": "Apache-2.0",
527+
"url": "huggingface:/OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
528+
"backend": "openvino"
520529
}
521530
],
522531
"categories": [

packages/backend/src/assets/inference-images.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,8 @@
55
"llamacpp": {
66
"default": "quay.io/ramalama/ramalama-llama-server@sha256:4e56101073e0bd6f2f2e15839b64315656d0dbfc1331a3385f2ae722e13f2279",
77
"cuda": "quay.io/ramalama/cuda-llama-server@sha256:56efc824e5b3ae6a6a11e9537ed9e2ac05f9f9fc6f2e81a55eb67b662c94fe95"
8+
},
9+
"openvino": {
10+
"default": "quay.io/ramalama/openvino@sha256:670d91cc322933cc4263606459317cd4ca3fcfb16d59a46b11dcd498c2cd7cb5"
811
}
912
}

packages/backend/src/managers/inference/inferenceManager.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ describe('Create Inference Server', () => {
288288
enabled: () => true,
289289
name: 'dummy-inference-provider',
290290
dispose: () => {},
291+
prePerform: vi.fn().mockReturnValue(Promise.resolve()),
291292
perform: vi.fn<() => InferenceServer>().mockResolvedValue({
292293
container: {
293294
containerId: 'dummy-container-id',

packages/backend/src/managers/inference/inferenceManager.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ export class InferenceManager extends Publisher<InferenceServer[]> implements Di
193193

194194
if (!connection) throw new Error('cannot find running container provider connection');
195195

196+
await provider.prePerform(config);
197+
196198
// upload models to podman machine if user system is supported
197199
config.modelsInfo = await Promise.all(
198200
config.modelsInfo.map(modelInfo =>

packages/backend/src/managers/modelsManager.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ describe('deleting models', () => {
658658
'machine-2',
659659
'rm',
660660
'-f',
661-
'/home/user/ai-lab/models/dummyFile',
661+
'/home/user/ai-lab/models/model-id-1',
662662
]);
663663
});
664664
});

packages/backend/src/managers/playgroundV2Manager.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,13 +206,15 @@ test('valid submit should create IPlaygroundMessage and notify the webview', asy
206206
{
207207
id: 'dummyModelId',
208208
file: {
209+
path: '.',
209210
file: 'dummyModelFile',
210211
},
211212
},
212213
],
213214
connection: {
214215
port: 8888,
215216
},
217+
labels: [],
216218
} as unknown as InferenceServer,
217219
]);
218220
// @ts-expect-error the mocked return value is just a partial of the real OpenAI provider
@@ -284,13 +286,15 @@ test('error', async () => {
284286
{
285287
id: 'dummyModelId',
286288
file: {
289+
path: '.',
287290
file: 'dummyModelFile',
288291
},
289292
},
290293
],
291294
connection: {
292295
port: 8888,
293296
},
297+
labels: [],
294298
} as unknown as InferenceServer,
295299
]);
296300
const doStream: LanguageModelV1['doStream'] = async () => {
@@ -685,13 +689,15 @@ describe('system prompt', () => {
685689
{
686690
id: 'dummyModelId',
687691
file: {
692+
path: '.',
688693
file: 'dummyModelFile',
689694
},
690695
},
691696
],
692697
connection: {
693698
port: 8888,
694699
},
700+
labels: [],
695701
} as unknown as InferenceServer,
696702
]);
697703
// @ts-expect-error the mocked return value is just a partial of the real OpenAI provider

packages/backend/src/managers/playgroundV2Manager.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ export class PlaygroundV2Manager implements Disposable {
218218
timestamp: Date.now(),
219219
} as UserChat);
220220

221-
if (!modelInfo.file?.file) throw new Error('model info has undefined file.');
221+
if (!modelInfo.file?.path) throw new Error('model info has undefined file.');
222222

223223
const telemetry: Record<string, unknown> = {
224224
conversationId: conversationId,
@@ -243,7 +243,7 @@ export class PlaygroundV2Manager implements Disposable {
243243

244244
const openAiClient = createOpenAICompatible({
245245
name: modelInfo.name,
246-
baseURL: `http://localhost:${server.connection.port}/v1`,
246+
baseURL: server.labels['api'] ?? `http://localhost:${server.connection.port}/v1`,
247247
});
248248
let model = openAiClient(modelInfo.name);
249249
// Tool calling in OpenAI doesn't support streaming yet

packages/backend/src/models/HuggingFaceModelHandler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ export class HuggingFaceModelHandler extends ModelHandler {
129129
if (model) {
130130
model.model.file = {
131131
path: revision.path,
132-
file: revision.path,
132+
file: '',
133133
creation: revision.lastModifiedAt,
134134
size: revision.size,
135135
};

packages/backend/src/studio.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ import { HuggingFaceModelHandler } from './models/HuggingFaceModelHandler';
6262
import { LlamaStackApiImpl } from './llama-stack-api-impl';
6363
import { LLAMA_STACK_API_CHANNEL, type LlamaStackAPI } from '@shared/LlamaStackAPI';
6464
import { LlamaStackManager } from './managers/llama-stack/llamaStackManager';
65+
import { OpenVINO } from './workers/provider/OpenVINO';
66+
import os from 'node:os';
6567

6668
export class Studio {
6769
readonly #extensionContext: ExtensionContext;
@@ -280,6 +282,13 @@ export class Studio {
280282
this.#extensionContext.subscriptions.push(
281283
this.#inferenceProviderRegistry.register(new WhisperCpp(this.#taskRegistry, this.#podmanConnection)),
282284
);
285+
if (os.arch() === 'x64') {
286+
this.#extensionContext.subscriptions.push(
287+
this.#inferenceProviderRegistry.register(
288+
new OpenVINO(this.#taskRegistry, this.#podmanConnection, this.#modelsManager, this.#configurationRegistry),
289+
),
290+
);
291+
}
283292

284293
/**
285294
* The inference manager create, stop, manage Inference servers

packages/backend/src/utils/modelsUtils.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ describe('getRemoteModelFile', () => {
135135
},
136136
} as unknown as ModelInfo);
137137

138-
expect(path).toBe(posix.join(MACHINE_BASE_FOLDER, 'dummy.guff'));
138+
expect(path).toBe(posix.join(MACHINE_BASE_FOLDER, 'dummyModelId'));
139139
});
140140
});
141141

0 commit comments

Comments
 (0)