containers · gastoner · May 17, 2025 · Apr 16, 2025 · Apr 25, 2025 · Apr 30, 2025
@@ -51,6 +51,16 @@
           "maximum": 65535,
           "description": "Port on which the API is listening (requires restart of extension)"
         },
+        "ai-lab.inferenceRuntime": {
+          "type": "string",
+          "enum": [
+            "all",
+            "llama-cpp",
+            "whisper-cpp",
+            "none"
+          ],
+          "description": "Choose the default inferencing runtime for AI Lab"
+        },
         "ai-lab.experimentalTuning": {
           "type": "boolean",
           "default": false,

@@ -141,6 +141,7 @@ beforeEach(() => {
     modelsPath: '~/downloads',
     experimentalTuning: false,
     apiPort: 0,
+    inferenceRuntime: 'llama-cpp',
     experimentalGPU: false,
     showGPUPromotion: false,
     appearance: 'dark',
@@ -1007,6 +1008,7 @@ describe('uploadModelToPodmanMachine', () => {
       modelsPath: '~/downloads',
       experimentalTuning: false,
       apiPort: 0,
+      inferenceRuntime: 'llama-cpp',
       experimentalGPU: false,
       showGPUPromotion: false,
       appearance: 'dark',

@@ -26,6 +26,7 @@ const CONFIGURATION_SECTIONS: string[] = [
   'models.path',
   'experimentalGPU',
   'apiPort',
+  'inferenceRuntime',
   'experimentalTuning',
   'modelUploadDisabled',
   'showGPUPromotion',
@@ -54,6 +55,7 @@ export class ConfigurationRegistry extends Publisher<ExtensionConfiguration> imp
       modelsPath: this.getModelsPath(),
       experimentalGPU: this.#configuration.get<boolean>('experimentalGPU') ?? false,
       apiPort: this.#configuration.get<number>('apiPort') ?? API_PORT_DEFAULT,
+      inferenceRuntime: this.#configuration.get<string>('inferenceRuntime') ?? 'all',
       experimentalTuning: this.#configuration.get<boolean>('experimentalTuning') ?? false,
       modelUploadDisabled: this.#configuration.get<boolean>('modelUploadDisabled') ?? false,
       showGPUPromotion: this.#configuration.get<boolean>('showGPUPromotion') ?? true,

@@ -97,6 +97,7 @@ beforeEach(() => {
     experimentalGPU: false,
     modelsPath: 'model-path',
     apiPort: 10434,
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     modelUploadDisabled: false,
     showGPUPromotion: false,
@@ -278,6 +279,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -321,6 +323,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -369,6 +372,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -417,6 +421,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -453,6 +458,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -498,6 +504,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -537,6 +544,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,
@@ -581,6 +589,7 @@ describe('perform', () => {
       experimentalGPU: true,
       modelsPath: '',
       apiPort: 10434,
+      inferenceRuntime: 'llama-cpp',
       experimentalTuning: false,
       modelUploadDisabled: false,
       showGPUPromotion: false,

@@ -95,6 +95,7 @@ beforeEach(() => {
     experimentalGPU: false,
     modelsPath: 'model-path',
     apiPort: 10434,
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     modelUploadDisabled: false,
     showGPUPromotion: false,

@@ -68,6 +68,7 @@ beforeEach(() => {
     apiPort: 0,
     experimentalTuning: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     modelUploadDisabled: false,
     showGPUPromotion: false,
     appearance: 'dark',

@@ -45,6 +45,7 @@ const mockConfiguration: Writable<ExtensionConfiguration> = writable({
   experimentalGPU: false,
   modelsPath: '',
   apiPort: -1,
+  inferenceRuntime: 'llama-cpp',
   modelUploadDisabled: false,
   experimentalTuning: false,
   showGPUPromotion: false,
@@ -64,6 +65,7 @@ test('should show banner if gpu support if off and gpu promotion on', async () =
     showGPUPromotion: true,
     modelUploadDisabled: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     apiPort: -1,
     appearance: 'dark',
@@ -85,6 +87,7 @@ test('should not show banner if gpu support if on and gpu promotion on', async (
     showGPUPromotion: true,
     modelUploadDisabled: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     apiPort: -1,
     appearance: 'dark',
@@ -106,6 +109,7 @@ test('should not show banner if gpu support if off and gpu promotion off', async
     showGPUPromotion: false,
     modelUploadDisabled: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     apiPort: -1,
     appearance: 'dark',
@@ -128,6 +132,7 @@ test('click enable should call client', async () => {
     showGPUPromotion: true,
     modelUploadDisabled: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     apiPort: -1,
     appearance: 'dark',
@@ -155,6 +160,7 @@ test('click hide should call client', async () => {
     showGPUPromotion: true,
     modelUploadDisabled: false,
     modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     apiPort: -1,
     appearance: 'dark',

@@ -117,6 +117,7 @@ beforeEach(() => {
   vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({
     experimentalGPU: false,
     apiPort: 0,
+    inferenceRuntime: 'llama-cpp',
     experimentalTuning: false,
     modelsPath: '',
     modelUploadDisabled: false,

@@ -45,6 +45,14 @@ vi.mock('../utils/client', async () => ({
   studioClient: {
     openURL: vi.fn(),
     openDialog: vi.fn(),
+    getExtensionConfiguration: vi.fn(),
+  },
+  rpcBrowser: {
+    subscribe: (): unknown => {
+      return {
+        unsubscribe: (): void => {},
+      };
+    },
   },
 }));
 
@@ -53,6 +61,16 @@ beforeEach(() => {
 
   const infos: Writable<ModelInfo[]> = writable([]);
   vi.mocked(modelsInfo).subscribe.mockImplementation(run => infos.subscribe(run));
+  vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({
+    experimentalGPU: false,
+    apiPort: 0,
+    experimentalTuning: false,
+    modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
+    modelUploadDisabled: false,
+    showGPUPromotion: false,
+    appearance: 'dark',
+  });
 });
 
 test('empty form should have submit disabled', async () => {

@@ -59,6 +59,7 @@ vi.mock('../utils/client', async () => {
   return {
     studioClient: {
       requestCreatePlayground: vi.fn(),
+      getExtensionConfiguration: vi.fn().mockResolvedValue({}),
     },
     rpcBrowser: {
       subscribe: (): unknown => {

@@ -34,6 +34,14 @@ vi.mock('/@/stores/catalog', async () => {
 vi.mock('../utils/client', async () => ({
   studioClient: {
     filterRecipes: vi.fn(),
+    getExtensionConfiguration: vi.fn().mockResolvedValue({}),
+  },
+  rpcBrowser: {
+    subscribe: (): unknown => {
+      return {
+        unsubscribe: (): void => {},
+      };
+    },
   },
 }));
 
@@ -104,6 +112,16 @@ beforeEach(() => {
     filters: {},
     choices: {},
   });
+  vi.mocked(studioClient.getExtensionConfiguration).mockResolvedValue({
+    experimentalGPU: false,
+    apiPort: 0,
+    experimentalTuning: false,
+    modelsPath: '',
+    inferenceRuntime: 'llama-cpp',
+    modelUploadDisabled: false,
+    showGPUPromotion: false,
+    appearance: 'dark',
+  });
 });
 
 test('recipe without category should be visible', async () => {

@@ -8,6 +8,8 @@ import { Fa } from 'svelte-fa';
 import { faGithub } from '@fortawesome/free-brands-svg-icons'; // Import the GitHub icon
 import { studioClient } from '../utils/client';
 import type { CatalogFilterKey, Choice, RecipeChoices, RecipeFilters } from '@shared/models/FilterRecipesResult';
+import { onMount } from 'svelte';
+import { configuration } from '../stores/extensionConfiguration';
 
 // filters available in the dropdowns for the user to select
 let choices: RecipeChoices = $state({});
@@ -100,6 +102,14 @@ const filtersComponents: { label: string; key: CatalogFilterKey }[] = [
 function openContribution(): void {
   studioClient.openURL('https://github.com/containers/ai-lab-recipes/blob/main/CONTRIBUTING.md').catch(console.error);
 }
+
+let defaultRuntime: string | undefined = $state();
+
+onMount(() => {
+  const inferenceRuntime = $configuration?.inferenceRuntime;
+  if (inferenceRuntime) defaultRuntime = inferenceRuntime;
+  if (inferenceRuntime !== 'all') onFilterChange('tools', defaultRuntime ?? '');
+});
 </script>
 
 <NavPage title="Recipe Catalog" searchEnabled={false}>
@@ -134,6 +144,7 @@ function openContribution(): void {
               <label for={filterComponent.key} class="block mb-2 text-sm font-medium">{filterComponent.label}</label>
               <Dropdown
                 id={filterComponent.key}
+                value={filterComponent.key === 'tools' ? defaultRuntime : ''}
                 options={choicesToOptions(choices[filterComponent.key])}
                 onChange={(v): void => onFilterChange(filterComponent.key, v)}></Dropdown>
             </div>

@@ -59,6 +59,7 @@ const mockConfiguration: Writable<ExtensionConfiguration> = writable({
   modelUploadDisabled: false,
   experimentalTuning: false,
   showGPUPromotion: false,
+  inferenceRuntime: 'llama-cpp',
   appearance: 'dark',
 });
 
@@ -75,6 +76,7 @@ test('renders Start Fine Tuning button if experimentalTuning is true', async ()
     modelUploadDisabled: false,
     modelsPath: '',
     experimentalTuning: true,
+    inferenceRuntime: 'llama-cpp',
     apiPort: -1,
     appearance: 'dark',
   });
@@ -89,6 +91,7 @@ test('does not render Start Fine Tuning button if experimentalTuning is false',
     modelUploadDisabled: false,
     modelsPath: '',
     experimentalTuning: false,
+    inferenceRuntime: 'llama-cpp',
     apiPort: -1,
     appearance: 'dark',
   });
@@ -103,6 +106,7 @@ test('navigates to /tune/start when Start Fine Tuning is clicked', async () => {
     modelUploadDisabled: false,
     modelsPath: '',
     experimentalTuning: true,
+    inferenceRuntime: 'llama-cpp',
     apiPort: -1,
     appearance: 'dark',
   });

@@ -47,6 +47,7 @@ const mockConfiguration: Writable<ExtensionConfiguration> = writable({
   experimentalGPU: false,
   modelsPath: '',
   apiPort: 10434,
+  inferenceRuntime: 'llama-cpp',
   modelUploadDisabled: false,
   experimentalTuning: false,
   showGPUPromotion: false,

@@ -20,6 +20,7 @@ export interface ExtensionConfiguration {
   experimentalGPU: boolean;
   modelsPath: string;
   apiPort: number;
+  inferenceRuntime: string;
   experimentalTuning: boolean;
   modelUploadDisabled: boolean;
   showGPUPromotion: boolean;