fix: adding chatformat to use for inference servers (#868)

axel7083 · web-flow · commit 885fce56b5bf · 2024-04-15T09:09:38.000+02:00
* fix: adding chatformat to use for inference servers

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* test: ensuring chatformat propagate the env

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* refactor: move chat format to properties section

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* fix: using camelCase for json properties

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* fix: json properties

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* fix: adding MODEL_ prefix

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* fix: linter&amp;prettier

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* chore: bump container image version

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

* fix: prettier

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;

---------

Signed-off-by: axel7083 &lt;42176370+axel7083@users.noreply.github.com&gt;
diff --git a/packages/backend/src/assets/ai.json b/packages/backend/src/assets/ai.json
@@ -104,7 +104,10 @@
       "registry": "Hugging Face",
       "license": "Apache-2.0",
       "url": "https://huggingface.co/ibm/merlinite-7b-GGUF/resolve/main/merlinite-7b-Q4_K_M.gguf",
-      "memory": 4370129224
+      "memory": 4370129224,
+      "properties": {
+        "chatFormat": "openchat"
+      }
     },
     {
       "id": "hf.TheBloke.mistral-7b-codealpaca-lora.Q4_K_M",
@@ -134,7 +137,10 @@
       "registry": "Hugging Face",
       "license": "Apache-2.0",
       "url": "https://huggingface.co/froggeric/Cerebrum-1.0-7b-GGUF/resolve/main/Cerebrum-1.0-7b-Q4_KS.gguf",
-      "memory": 4144643441
+      "memory": 4144643441,
+      "properties": {
+        "chatFormat": "openchat"
+      }
     },
     {
       "id": "hf.TheBloke.openchat-3.5-0106.Q4_K_M",
@@ -174,7 +180,10 @@
       "registry": "Hugging Face",
       "license": "Apache-2.0",
       "url": "https://huggingface.co/llmware/dragon-mistral-7b-v0/resolve/main/dragon-mistral-7b-q4_k_m.gguf",
-      "memory": 4370129224
+      "memory": 4370129224,
+      "properties": {
+        "chatFormat": "openchat"
+      }
     },
     {
       "id": "hf.MaziyarPanahi.MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M",
diff --git a/packages/backend/src/managers/inference/inferenceManager.spec.ts b/packages/backend/src/managers/inference/inferenceManager.spec.ts
@@ -309,7 +309,9 @@ describe('Create Inference Server', () => {
     );
     expect(taskRegistryMock.createTask).toHaveBeenNthCalledWith(
       1,
-      'Pulling ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.2.0.',
+      expect.stringContaining(
+        'Pulling ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:',
+      ),
       'loading',
       {
         trackingId: 'dummyTrackingId',
diff --git a/packages/backend/src/utils/inferenceUtils.spec.ts b/packages/backend/src/utils/inferenceUtils.spec.ts
@@ -96,6 +96,68 @@ describe('generateContainerCreateOptions', () => {
       },
     });
   });
+
+  test('model info with chat_format properties', () => {
+    const result = generateContainerCreateOptions(
+      {
+        port: 8888,
+        providerId: 'test@providerId',
+        image: INFERENCE_SERVER_IMAGE,
+        modelsInfo: [
+          {
+            id: 'dummyModelId',
+            file: {
+              file: 'dummyFile',
+              path: 'dummyPath',
+            },
+            properties: {
+              chatFormat: 'dummyChatFormat',
+            },
+          },
+        ],
+      } as unknown as InferenceServerConfig,
+      {
+        Id: 'dummyImageId',
+        engineId: 'dummyEngineId',
+        RepoTags: [INFERENCE_SERVER_IMAGE],
+      } as unknown as ImageInfo,
+    );
+
+    expect(result.Env).toContain('MODEL_CHAT_FORMAT=dummyChatFormat');
+  });
+
+  test('model info with multiple properties', () => {
+    const result = generateContainerCreateOptions(
+      {
+        port: 8888,
+        providerId: 'test@providerId',
+        image: INFERENCE_SERVER_IMAGE,
+        modelsInfo: [
+          {
+            id: 'dummyModelId',
+            file: {
+              file: 'dummyFile',
+              path: 'dummyPath',
+            },
+            properties: {
+              basicProp: 'basicProp',
+              lotOfCamelCases: 'lotOfCamelCases',
+              lowercase: 'lowercase',
+            },
+          },
+        ],
+      } as unknown as InferenceServerConfig,
+      {
+        Id: 'dummyImageId',
+        engineId: 'dummyEngineId',
+        RepoTags: [INFERENCE_SERVER_IMAGE],
+      } as unknown as ImageInfo,
+    );
+
+    expect(result.Env).toContain('MODEL_BASIC_PROP=basicProp');
+    expect(result.Env).toContain('MODEL_LOT_OF_CAMEL_CASES=lotOfCamelCases');
+    expect(result.Env).toContain('MODEL_LOWERCASE=lowercase');
+  });
 });
 
 describe('withDefaultConfiguration', () => {
diff --git a/packages/backend/src/utils/inferenceUtils.ts b/packages/backend/src/utils/inferenceUtils.ts
@@ -34,7 +34,7 @@ export const SECOND: number = 1_000_000_000;
 export const LABEL_INFERENCE_SERVER: string = 'ai-lab-inference-server';
 
 export const INFERENCE_SERVER_IMAGE =
-  'ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.2.0';
+  'ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.3.0';
 
 /**
  * Return container connection provider
@@ -115,6 +115,16 @@ export function generateContainerCreateOptions(
     throw new Error('The model info file provided is undefined');
   }
 
+  const envs: string[] = [`MODEL_PATH=/models/${modelInfo.file.file}`, 'HOST=0.0.0.0', 'PORT=8000'];
+  if (modelInfo.properties) {
+    envs.push(
+      ...Object.entries(modelInfo.properties).map(([key, value]) => {
+        const formattedKey = key.replace(/[A-Z]/g, m => `_${m}`).toUpperCase();
+        return `MODEL_${formattedKey}=${value}`;
+      }),
+    );
+  }
+
   return {
     Image: imageInfo.Id,
     Detach: true,
@@ -147,7 +157,7 @@ export function generateContainerCreateOptions(
       ...config.labels,
       [LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)),
     },
-    Env: [`MODEL_PATH=/models/${modelInfo.file.file}`, 'HOST=0.0.0.0', 'PORT=8000'],
+    Env: envs,
     Cmd: ['--models-path', '/models', '--context-size', '700', '--threads', '4'],
   };
 }
diff --git a/packages/shared/src/models/IModelInfo.ts b/packages/shared/src/models/IModelInfo.ts
@@ -29,6 +29,9 @@ export interface ModelInfo {
   file?: LocalModelInfo;
   state?: 'deleting';
   memory?: number;
+  properties?: {
+    [key: string]: string;
+  };
 }
 
 export type ModelCheckerContext = 'inference' | 'recipe';

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,9 @@ export interface ModelInfo {`
`29`	`29`	`file?: LocalModelInfo;`
`30`	`30`	`state?: 'deleting';`
`31`	`31`	`memory?: number;`
	`32`	`+ properties?: {`
	`33`	`+ [key: string]: string;`
	`34`	`+ };`
`32`	`35`	`}`
`33`	`36`
`34`	`37`	`export type ModelCheckerContext = 'inference' \| 'recipe';`