[openvino-langchain] Add ChatOpenVINO (#983)

Retribution98 · almilosz · web-flow · commit 172388147c76 · 2025-07-25T17:55:38.000+02:00
Signed-off-by: Kirill Suvorov &lt;kirill.suvorov@intel.com&gt;
Co-authored-by: Alicja Miloszewska &lt;alicja.miloszewska@intel.com&gt;
diff --git a/modules/openvino-langchain/README.md b/modules/openvino-langchain/README.md
@@ -64,9 +64,9 @@ optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weigh
 
 ## LLM
 
-This package contains the `GenAI` class, which is the recommended way to interact with models optimized for the OpenVINO toolkit.
+This package contains the `OpenVINO` class, which is the recommended way to interact with models optimized for the OpenVINO toolkit.
 
-**GenAI Parameters**
+**OpenVINO Parameters**
 
 | Name  | Type | Required | Description |
 | ----- | ---- |--------- | ----------- |
@@ -75,9 +75,9 @@ This package contains the `GenAI` class, which is the recommended way to interac
 | generationConfig | [GenerationConfig](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/js/lib/utils.ts#L107-L110) | ❌ | Structure to keep generation config parameters. |
 
 ```typescript
-import { GenAI } from "openvino-langchain";
+import { OpenVINO } from "openvino-langchain";
 
-const model = new GenAI({
+const model = new OpenVINO({
     modelPath: "path-to-model",
     device: "CPU",
     generationConfig: {
@@ -87,6 +87,38 @@ const model = new GenAI({
 const response = await model.invoke("Hello, world!");
 ```
 
+## ChatModel
+
+This package contains the `ChatOpenVINO` class, which allow use the OpenVINO for chat pipelines.
+
+**ChatOpenVINO Parameters**
+
+| Name  | Type | Required | Description |
+| ----- | ---- |--------- | ----------- |
+| modelPath | string | ✅ | Path to the directory containing model xml/bin files and tokenizer |
+| device | string | ❌ | Device to run the model on (e.g., CPU, GPU). |
+| generationConfig | [GenerationConfig](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/js/lib/utils.ts#L107-L110) | ❌ | Structure to keep generation config parameters. |
+
+```js
+import { ChatOpenVINO } from "openvino-langchain";
+import { HumanMessage, SystemMessage } from '@langchain/core/messages';
+
+const model = new ChatOpenVINO({
+    modelPath: "path-to-model",
+    device: "CPU",
+    generationConfig: {
+        "max_new_tokens": 100,
+    },
+  });
+
+const messages = [
+  new SystemMessage('Translate the following from English into German'),
+  new HumanMessage('Thank you!'),
+];
+const response = await model.invoke(messages);
+console.log(response.content);
+```
+
 ## Text Embedding Model
 
 This package also adds support for OpenVINO's embeddings model.
diff --git a/modules/openvino-langchain/sample/README.md b/modules/openvino-langchain/sample/README.md
@@ -1,21 +1,55 @@
-## How to run sample?
+# OpenVINO™ LangChain.js adapter samples
 
-First download a sample model. You can use Optimum Intel [tool](https://github.com/huggingface/optimum-intel):
+## Download and convert the model and tokenizers
+
+You need to convert and compress the text generation model into the [OpenVINO IR format](https://docs.openvino.ai/2025/documentation/openvino-ir-format.html).
+Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#large-language-models-llms) for more details.
+
+### Option 1. Convert a model with Optimum Intel
+
+First install [Optimum Intel](https://github.com/huggingface/optimum-intel) and then run the export with Optimum CLI:
 
 ```bash
-optimum-cli export openvino --trust-remote-code --model microsoft/Phi-3.5-mini-instruct Phi-3.5-mini-instruct
+optimum-cli export openvino --model <model> <output_folder>
 ```
 
-Alternatively, you can clone the repository:
+### Option 2. Download a converted model
 
-```bash
-git clone https://huggingface.co/OpenVINO/Phi-3.5-mini-instruct-fp16-ov
+If a converted model in OpenVINO IR format is already available in the collection of [OpenVINO optimized LLMs](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) on Hugging Face, it can be downloaded directly via [huggingface-cli](https://huggingface.co/docs/huggingface_hub/en/guides/cli).
+
+```sh
+huggingface-cli download <model> --local-dir <output_folder>
 ```
 
-Then navigate to the `openvino-langchain/sample` directory and run the sample:
+## Install NPM dependencies
+
+Run the following command from the current directory:
 
 ```bash
-cd sample/
 npm install
-node index.js *path_to_llm_model_dir* *path_to_embeddings_model_dir*
 ```
+
+## Sample Descriptions
+
+### 1. Chat Sample (`chat_sample`)
+- **Description:** Interactive chat interface powered by OpenVINO.
+- **Recommended Models:** 
+  - `meta-llama/Llama-2-7b-chat-hf`
+  - `TinyLlama/TinyLlama-1.1B-Chat-v1.0`
+- **Main Feature:** Real-time chat-like text generation.
+- **Run Command:**
+  ```bash
+  node chat_sample.js <model_dir>
+  ```
+
+### 2. RAG Sample (`rag_sample`)
+- **Description:** This sample retrieves relevant documents from a simple [knowledge base](./data/document_sample.txt) using a retriever model
+and generates a response using a generative model, conditioned on both the user query and the retrieved documents.
+- **Recommended Models:**
+  - **LLM:** `meta-llama/Llama-2-7b-chat-hf`
+  - **Embedding:** `BAAI/bge-small-en-v1.5`
+- **Main Feature:** RAG pipeline implementation.
+- **Run Command:**
+  ```bash
+  node rag_sample.js <llm_dir> <embedding_model_dir>
+  ```
diff --git a/modules/openvino-langchain/sample/chat_sample.js b/modules/openvino-langchain/sample/chat_sample.js
@@ -0,0 +1,84 @@
+import { HumanMessage, SystemMessage } from '@langchain/core/messages';
+import { ChatOpenVINO } from 'openvino-langchain';
+import { basename } from 'node:path';
+import readline from 'readline';
+
+const LLM_MODEL_PATH = process.argv[2];
+
+if (!LLM_MODEL_PATH) {
+  console.error('Please specify path to models directories\n'
+    + 'Run command must be:\n'
+    + `'node ${basename(process.argv[1])} *path_to_llm_model_dir*'`);
+  process.exit(1);
+}
+if (process.argv.length > 3) {
+  console.error(
+    `Run command must be:
+    'node ${basename(process.argv[1])} *path_to_llm_model_dir*'`,
+  );
+  process.exit(1);
+}
+
+async function main() {
+  const MODEL_PATH = process.argv[2];
+
+  if (process.argv.length > 3) {
+    console.error(
+      `Run command must be:
+      'node ${basename(process.argv[1])} *path_to_model_dir*'`,
+    );
+    process.exit(1);
+  }
+  if (!MODEL_PATH) {
+    console.error('Please specify path to model directory\n'
+      + `Run command must be:
+      'node ${basename(process.argv[1])} *path_to_model_dir*'`);
+    process.exit(1);
+  }
+
+  const device = 'CPU'; // GPU can be used as well
+  const config = { 'max_new_tokens': 100 };
+  const chat = new ChatOpenVINO({
+    modelPath: LLM_MODEL_PATH,
+    device,
+    generationConfig: config,
+  });
+
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+
+  const messages = [
+    new SystemMessage('You are chatbot.'),
+  ];
+
+  promptUser();
+
+  // Function to prompt the user for input
+  function promptUser() {
+    rl.question('question:\n', handleInput);
+  }
+
+  // Function to handle user input
+  async function handleInput(input) {
+    input = input.trim();
+
+    // Check for exit command
+    if (!input) {
+      rl.close();
+      process.exit(0);
+    }
+
+    messages.push(new HumanMessage(input));
+    const aiResponse = await chat.invoke(messages);
+
+    messages.push(aiResponse);
+    console.log(aiResponse.text);
+    console.log('\n----------');
+
+    promptUser();
+  }
+}
+
+main();
diff --git a/modules/openvino-langchain/sample/data/document_sample.txt b/modules/openvino-langchain/sample/data/document_sample.txt
diff --git a/modules/openvino-langchain/sample/rag_sample.js b/modules/openvino-langchain/sample/rag_sample.js
@@ -8,7 +8,7 @@ import { TextLoader } from 'langchain/document_loaders/fs/text';
 import { OpenVINO, OpenVINOEmbeddings } from 'openvino-langchain';
 
 // Paths to document and models
-const TEXT_DOCUMENT_PATH = './document_sample.txt';
+const TEXT_DOCUMENT_PATH = './data/document_sample.txt';
 const LLM_MODEL_PATH = process.argv[2];
 const EMBEDDINGS_MODEL_PATH = process.argv[3];
 
diff --git a/modules/openvino-langchain/src/chat_models.ts b/modules/openvino-langchain/src/chat_models.ts
@@ -0,0 +1,118 @@
+import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
+import {
+  BaseLanguageModelCallOptions,
+} from '@langchain/core/language_models/base';
+import {
+  SimpleChatModel,
+} from '@langchain/core/language_models/chat_models';
+import { AIMessageChunk, BaseMessage } from '@langchain/core/messages';
+import { ChatGenerationChunk } from '@langchain/core/outputs';
+import {
+  GenerationConfig,
+  LLMPipeline,
+  StreamingStatus,
+} from 'openvino-genai-node';
+
+export interface ChatOpenVINOParams extends BaseLanguageModelCallOptions {
+  generationConfig?: GenerationConfig,
+  modelPath: string,
+  device?: string,
+}
+
+export class ChatOpenVINO extends SimpleChatModel {
+  generateOptions: GenerationConfig;
+
+  path: string;
+
+  device: string;
+
+  pipeline: Promise<any>;
+
+  constructor(params: ChatOpenVINOParams) {
+    super(params);
+    this.path = params.modelPath;
+    this.device = params.device || 'CPU';
+    this.pipeline = LLMPipeline(this.path, this.device);
+    this.generateOptions = params.generationConfig || {};
+  }
+  _llmType() {
+    return 'OpenVINO';
+  }
+  private convertMessages(messages: BaseMessage[]): string {
+    return messages
+      .map((msg) => `${msg.getType().toUpperCase()}: "${msg.content}"`)
+      .join('\n');
+  }
+  async _call(
+    messages: BaseMessage[],
+    options: this['ParsedCallOptions'],
+    runManager?: CallbackManagerForLLMRun,
+  ): Promise<string> {
+    if (!messages.length) {
+      throw new Error('No messages provided.');
+    }
+    if (typeof messages[0].content !== 'string') {
+      throw new Error('Multimodal messages are not supported.');
+    }
+    const pipeline = await this.pipeline;
+
+    // Signal setup
+    const signals: AbortSignal[] = [];
+    if (options.signal) {
+      signals.push(options.signal);
+    }
+    if (options.timeout) {
+      signals.push(AbortSignal.timeout(options.timeout));
+    }
+    const signal = AbortSignal.any(signals);
+
+    // generation option setup
+    const generateOptions: GenerationConfig = { ...this.generateOptions };
+    if (options.stop) {
+      const set = new Set(options.stop);
+      generateOptions['stop_strings'] = set;
+      generateOptions['include_stop_str_in_output'] = true;
+    }
+
+    // callback setup
+    const callback = (chunk: string) => {
+      runManager?.handleLLMNewToken(chunk).catch(console.error);
+
+      return signal.aborted ? StreamingStatus.CANCEL : StreamingStatus.RUNNING;
+    };
+
+    const prompt = this.convertMessages(messages);
+
+    const result = await pipeline.generate(
+      prompt,
+      generateOptions,
+      callback,
+    );
+    // We need to throw an exception if the generation was canceled by a signal
+    signal.throwIfAborted();
+
+    return result;
+  }
+
+  async *_streamResponseChunks(
+    messages: BaseMessage[],
+    _options: this['ParsedCallOptions'],
+    runManager?: CallbackManagerForLLMRun,
+  ): AsyncGenerator<ChatGenerationChunk> {
+    const pipeline = await this.pipeline;
+    const prompt = this.convertMessages(messages);
+    const generator = pipeline.stream(
+      prompt,
+      this.generateOptions,
+    );
+    for await (const chunk of generator) {
+      yield new ChatGenerationChunk({
+        message: new AIMessageChunk({
+          content: chunk,
+        }),
+        text: chunk,
+      });
+      await runManager?.handleLLMNewToken(chunk);
+    }
+  }
+}
diff --git a/modules/openvino-langchain/src/index.ts b/modules/openvino-langchain/src/index.ts
@@ -1,2 +1,3 @@
 export * from './embeddings.js';
 export * from './llms.js';
+export * from './chat_models.js';
diff --git a/modules/openvino-langchain/src/tests/chat_models.int.test.ts b/modules/openvino-langchain/src/tests/chat_models.int.test.ts

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`export * from './embeddings.js';`
`2`	`2`	`export * from './llms.js';`
	`3`	`+export * from './chat_models.js';`