diff --git a/packages/backend/package.json b/packages/backend/package.json index 4b763bd8f..e61604567 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -51,6 +51,16 @@ "maximum": 65535, "description": "Port on which the API is listening (requires restart of extension)" }, + "ai-lab.inferenceRuntime": { + "type": "string", + "enum": [ + "all", + "llama-cpp", + "whisper-cpp", + "none" + ], + "description": "Choose the default inferencing runtime for AI Lab" + }, "ai-lab.experimentalTuning": { "type": "boolean", "default": false, diff --git a/packages/backend/src/managers/modelsManager.spec.ts b/packages/backend/src/managers/modelsManager.spec.ts index 138ebe5ec..a171b7b68 100644 --- a/packages/backend/src/managers/modelsManager.spec.ts +++ b/packages/backend/src/managers/modelsManager.spec.ts @@ -141,6 +141,7 @@ beforeEach(() => { modelsPath: '~/downloads', experimentalTuning: false, apiPort: 0, + inferenceRuntime: 'llama-cpp', experimentalGPU: false, showGPUPromotion: false, appearance: 'dark', @@ -1007,6 +1008,7 @@ describe('uploadModelToPodmanMachine', () => { modelsPath: '~/downloads', experimentalTuning: false, apiPort: 0, + inferenceRuntime: 'llama-cpp', experimentalGPU: false, showGPUPromotion: false, appearance: 'dark', diff --git a/packages/backend/src/registries/ConfigurationRegistry.ts b/packages/backend/src/registries/ConfigurationRegistry.ts index 25d4ac057..e33ee60de 100644 --- a/packages/backend/src/registries/ConfigurationRegistry.ts +++ b/packages/backend/src/registries/ConfigurationRegistry.ts @@ -26,6 +26,7 @@ const CONFIGURATION_SECTIONS: string[] = [ 'models.path', 'experimentalGPU', 'apiPort', + 'inferenceRuntime', 'experimentalTuning', 'modelUploadDisabled', 'showGPUPromotion', @@ -54,6 +55,7 @@ export class ConfigurationRegistry extends Publisher imp modelsPath: this.getModelsPath(), experimentalGPU: this.#configuration.get('experimentalGPU') ?? false, apiPort: this.#configuration.get('apiPort') ?? API_PORT_DEFAULT, + inferenceRuntime: this.#configuration.get('inferenceRuntime') ?? 'all', experimentalTuning: this.#configuration.get('experimentalTuning') ?? false, modelUploadDisabled: this.#configuration.get('modelUploadDisabled') ?? false, showGPUPromotion: this.#configuration.get('showGPUPromotion') ?? true, diff --git a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts index badd86026..f4c730e4b 100644 --- a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts +++ b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts @@ -97,6 +97,7 @@ beforeEach(() => { experimentalGPU: false, modelsPath: 'model-path', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -278,6 +279,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -321,6 +323,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -369,6 +372,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -417,6 +421,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -453,6 +458,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -498,6 +504,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -537,6 +544,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, @@ -581,6 +589,7 @@ describe('perform', () => { experimentalGPU: true, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, diff --git a/packages/backend/src/workers/provider/OpenVINO.spec.ts b/packages/backend/src/workers/provider/OpenVINO.spec.ts index e6a56393c..eac1b1a49 100644 --- a/packages/backend/src/workers/provider/OpenVINO.spec.ts +++ b/packages/backend/src/workers/provider/OpenVINO.spec.ts @@ -95,6 +95,7 @@ beforeEach(() => { experimentalGPU: false, modelsPath: 'model-path', apiPort: 10434, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, diff --git a/packages/frontend/src/lib/notification/ContainerConnectionWrapper.spec.ts b/packages/frontend/src/lib/notification/ContainerConnectionWrapper.spec.ts index 7555ef05a..c25d8e663 100644 --- a/packages/frontend/src/lib/notification/ContainerConnectionWrapper.spec.ts +++ b/packages/frontend/src/lib/notification/ContainerConnectionWrapper.spec.ts @@ -68,6 +68,7 @@ beforeEach(() => { apiPort: 0, experimentalTuning: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', diff --git a/packages/frontend/src/lib/notification/GPUPromotion.spec.ts b/packages/frontend/src/lib/notification/GPUPromotion.spec.ts index 795bac63d..b543e2017 100644 --- a/packages/frontend/src/lib/notification/GPUPromotion.spec.ts +++ b/packages/frontend/src/lib/notification/GPUPromotion.spec.ts @@ -45,6 +45,7 @@ const mockConfiguration: Writable = writable({ experimentalGPU: false, modelsPath: '', apiPort: -1, + inferenceRuntime: 'llama-cpp', modelUploadDisabled: false, experimentalTuning: false, showGPUPromotion: false, @@ -64,6 +65,7 @@ test('should show banner if gpu support if off and gpu promotion on', async () = showGPUPromotion: true, modelUploadDisabled: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', experimentalTuning: false, apiPort: -1, appearance: 'dark', @@ -85,6 +87,7 @@ test('should not show banner if gpu support if on and gpu promotion on', async ( showGPUPromotion: true, modelUploadDisabled: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', experimentalTuning: false, apiPort: -1, appearance: 'dark', @@ -106,6 +109,7 @@ test('should not show banner if gpu support if off and gpu promotion off', async showGPUPromotion: false, modelUploadDisabled: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', experimentalTuning: false, apiPort: -1, appearance: 'dark', @@ -128,6 +132,7 @@ test('click enable should call client', async () => { showGPUPromotion: true, modelUploadDisabled: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', experimentalTuning: false, apiPort: -1, appearance: 'dark', @@ -155,6 +160,7 @@ test('click hide should call client', async () => { showGPUPromotion: true, modelUploadDisabled: false, modelsPath: '', + inferenceRuntime: 'llama-cpp', experimentalTuning: false, apiPort: -1, appearance: 'dark', diff --git a/packages/frontend/src/pages/CreateService.spec.ts b/packages/frontend/src/pages/CreateService.spec.ts index 2c3b90959..6d6891e7e 100644 --- a/packages/frontend/src/pages/CreateService.spec.ts +++ b/packages/frontend/src/pages/CreateService.spec.ts @@ -117,6 +117,7 @@ beforeEach(() => { vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({ experimentalGPU: false, apiPort: 0, + inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelsPath: '', modelUploadDisabled: false, diff --git a/packages/frontend/src/pages/NewInstructLabSession.spec.ts b/packages/frontend/src/pages/NewInstructLabSession.spec.ts index 4a82d5330..d0deee78b 100644 --- a/packages/frontend/src/pages/NewInstructLabSession.spec.ts +++ b/packages/frontend/src/pages/NewInstructLabSession.spec.ts @@ -45,6 +45,14 @@ vi.mock('../utils/client', async () => ({ studioClient: { openURL: vi.fn(), openDialog: vi.fn(), + getExtensionConfiguration: vi.fn(), + }, + rpcBrowser: { + subscribe: (): unknown => { + return { + unsubscribe: (): void => {}, + }; + }, }, })); @@ -53,6 +61,16 @@ beforeEach(() => { const infos: Writable = writable([]); vi.mocked(modelsInfo).subscribe.mockImplementation(run => infos.subscribe(run)); + vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({ + experimentalGPU: false, + apiPort: 0, + experimentalTuning: false, + modelsPath: '', + inferenceRuntime: 'llama-cpp', + modelUploadDisabled: false, + showGPUPromotion: false, + appearance: 'dark', + }); }); test('empty form should have submit disabled', async () => { diff --git a/packages/frontend/src/pages/PlaygroundCreate.spec.ts b/packages/frontend/src/pages/PlaygroundCreate.spec.ts index b298be39a..b1e771a59 100644 --- a/packages/frontend/src/pages/PlaygroundCreate.spec.ts +++ b/packages/frontend/src/pages/PlaygroundCreate.spec.ts @@ -59,6 +59,7 @@ vi.mock('../utils/client', async () => { return { studioClient: { requestCreatePlayground: vi.fn(), + getExtensionConfiguration: vi.fn().mockResolvedValue({}), }, rpcBrowser: { subscribe: (): unknown => { diff --git a/packages/frontend/src/pages/Recipes.spec.ts b/packages/frontend/src/pages/Recipes.spec.ts index e22d6b581..61aac6089 100644 --- a/packages/frontend/src/pages/Recipes.spec.ts +++ b/packages/frontend/src/pages/Recipes.spec.ts @@ -34,6 +34,14 @@ vi.mock('/@/stores/catalog', async () => { vi.mock('../utils/client', async () => ({ studioClient: { filterRecipes: vi.fn(), + getExtensionConfiguration: vi.fn().mockResolvedValue({}), + }, + rpcBrowser: { + subscribe: (): unknown => { + return { + unsubscribe: (): void => {}, + }; + }, }, })); @@ -104,6 +112,16 @@ beforeEach(() => { filters: {}, choices: {}, }); + vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({ + experimentalGPU: false, + apiPort: 0, + experimentalTuning: false, + modelsPath: '', + inferenceRuntime: 'llama-cpp', + modelUploadDisabled: false, + showGPUPromotion: false, + appearance: 'dark', + }); }); test('recipe without category should be visible', async () => { diff --git a/packages/frontend/src/pages/Recipes.svelte b/packages/frontend/src/pages/Recipes.svelte index 0592f42d6..2fb9da95e 100644 --- a/packages/frontend/src/pages/Recipes.svelte +++ b/packages/frontend/src/pages/Recipes.svelte @@ -8,6 +8,8 @@ import { Fa } from 'svelte-fa'; import { faGithub } from '@fortawesome/free-brands-svg-icons'; // Import the GitHub icon import { studioClient } from '../utils/client'; import type { CatalogFilterKey, Choice, RecipeChoices, RecipeFilters } from '@shared/models/FilterRecipesResult'; +import { onMount } from 'svelte'; +import { configuration } from '../stores/extensionConfiguration'; // filters available in the dropdowns for the user to select let choices: RecipeChoices = $state({}); @@ -100,6 +102,14 @@ const filtersComponents: { label: string; key: CatalogFilterKey }[] = [ function openContribution(): void { studioClient.openURL('https://github.com/containers/ai-lab-recipes/blob/main/CONTRIBUTING.md').catch(console.error); } + +let defaultRuntime: string | undefined = $state(); + +onMount(() => { + const inferenceRuntime = $configuration?.inferenceRuntime; + if (inferenceRuntime) defaultRuntime = inferenceRuntime; + if (inferenceRuntime !== 'all') onFilterChange('tools', defaultRuntime ?? ''); +}); @@ -134,6 +144,7 @@ function openContribution(): void { onFilterChange(filterComponent.key, v)}> diff --git a/packages/frontend/src/pages/instructlab/AboutInstructLab.spec.ts b/packages/frontend/src/pages/instructlab/AboutInstructLab.spec.ts index ba4d72008..d8363cc3c 100644 --- a/packages/frontend/src/pages/instructlab/AboutInstructLab.spec.ts +++ b/packages/frontend/src/pages/instructlab/AboutInstructLab.spec.ts @@ -59,6 +59,7 @@ const mockConfiguration: Writable = writable({ modelUploadDisabled: false, experimentalTuning: false, showGPUPromotion: false, + inferenceRuntime: 'llama-cpp', appearance: 'dark', }); @@ -75,6 +76,7 @@ test('renders Start Fine Tuning button if experimentalTuning is true', async () modelUploadDisabled: false, modelsPath: '', experimentalTuning: true, + inferenceRuntime: 'llama-cpp', apiPort: -1, appearance: 'dark', }); @@ -89,6 +91,7 @@ test('does not render Start Fine Tuning button if experimentalTuning is false', modelUploadDisabled: false, modelsPath: '', experimentalTuning: false, + inferenceRuntime: 'llama-cpp', apiPort: -1, appearance: 'dark', }); @@ -103,6 +106,7 @@ test('navigates to /tune/start when Start Fine Tuning is clicked', async () => { modelUploadDisabled: false, modelsPath: '', experimentalTuning: true, + inferenceRuntime: 'llama-cpp', apiPort: -1, appearance: 'dark', }); diff --git a/packages/frontend/src/pages/server-information/LocalServer.spec.ts b/packages/frontend/src/pages/server-information/LocalServer.spec.ts index 6468cffdf..f3ec8db90 100644 --- a/packages/frontend/src/pages/server-information/LocalServer.spec.ts +++ b/packages/frontend/src/pages/server-information/LocalServer.spec.ts @@ -47,6 +47,7 @@ const mockConfiguration: Writable = writable({ experimentalGPU: false, modelsPath: '', apiPort: 10434, + inferenceRuntime: 'llama-cpp', modelUploadDisabled: false, experimentalTuning: false, showGPUPromotion: false, diff --git a/packages/shared/src/models/IExtensionConfiguration.ts b/packages/shared/src/models/IExtensionConfiguration.ts index 0a13505b3..9d3ea6b29 100644 --- a/packages/shared/src/models/IExtensionConfiguration.ts +++ b/packages/shared/src/models/IExtensionConfiguration.ts @@ -20,6 +20,7 @@ export interface ExtensionConfiguration { experimentalGPU: boolean; modelsPath: string; apiPort: number; + inferenceRuntime: string; experimentalTuning: boolean; modelUploadDisabled: boolean; showGPUPromotion: boolean;