From f7359403d3438515ca7716b4893ee6f4d30b5c4e Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Wed, 12 Nov 2025 15:26:24 -0500 Subject: [PATCH 01/10] define hybrid class --- .../src/vectorstores/elasticsearch.ts | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 05a79d3aed4a..730b8c000bd6 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -24,6 +24,36 @@ interface VectorSearchOptions { readonly candidates?: number; } +/** + * Configuration options for hybrid retrieval strategy. + */ +export interface HybridRetrievalStrategyConfig { + rankWindowSize?: number; + rankConstant?: number; + textField?: string; + /** + * For Elasticsearch 9.x, set to `false` to include vectors in responses. + */ + excludeSourceVectors?: boolean; +} + +/** + * Hybrid search strategy combining vector and BM25 search using RRF. + */ +export class HybridRetrievalStrategy { + public readonly rankWindowSize: number; + public readonly rankConstant: number; + public readonly textField: string; + public readonly excludeSourceVectors?: boolean; + + constructor(config: HybridRetrievalStrategyConfig = {}) { + this.rankWindowSize = config.rankWindowSize ?? 100; + this.rankConstant = config.rankConstant ?? 60; + this.textField = config.textField ?? "text"; + this.excludeSourceVectors = config.excludeSourceVectors; + } +} + /** * Interface defining the arguments required to create an Elasticsearch * client. @@ -32,6 +62,7 @@ export interface ElasticClientArgs { readonly client: Client; readonly indexName?: string; readonly vectorSearchOptions?: VectorSearchOptions; + readonly strategy?: HybridRetrievalStrategy; } /** From a4ab6709ee29223947ae853c4b3a888ddc8d16ce Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Thu, 13 Nov 2025 12:59:06 -0500 Subject: [PATCH 02/10] add strategy, user agent --- .../src/vectorstores/elasticsearch.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 730b8c000bd6..717d0d0f8006 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -104,6 +104,8 @@ export class ElasticVectorSearch extends VectorStore { private readonly candidates: number; + private readonly strategy?: HybridRetrievalStrategy; + _vectorstoreType(): string { return "elasticsearch"; } @@ -116,9 +118,14 @@ export class ElasticVectorSearch extends VectorStore { this.m = args.vectorSearchOptions?.m ?? 16; this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 100; this.candidates = args.vectorSearchOptions?.candidates ?? 200; + this.strategy = args.strategy; + + const userAgent = this.strategy + ? "langchain-js-vs-hybrid/0.0.1" + : "langchain-js-vs/0.0.1"; this.client = args.client.child({ - headers: { "user-agent": "langchain-js-vs/0.0.1" }, + headers: { "user-agent": userAgent }, }); this.indexName = args.indexName ?? "documents"; } From d868ba39ae8be3e7fadeea13944b356502066b2a Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Thu, 13 Nov 2025 13:32:43 -0500 Subject: [PATCH 03/10] add lexical --- .../src/vectorstores/elasticsearch.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 717d0d0f8006..20575eb8a383 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -3,6 +3,7 @@ import { Client, estypes } from "@elastic/elasticsearch"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; +import type { Callbacks } from "@langchain/core/callbacks/manager"; /** * Type representing the k-nearest neighbors (k-NN) engine used in * Elasticsearch. @@ -106,6 +107,8 @@ export class ElasticVectorSearch extends VectorStore { private readonly strategy?: HybridRetrievalStrategy; + private lastQueryText?: string; + _vectorstoreType(): string { return "elasticsearch"; } @@ -193,6 +196,16 @@ export class ElasticVectorSearch extends VectorStore { return documentIds; } + async similaritySearch( + query: string, + k = 4, + filter?: ElasticFilter, + _callbacks?: Callbacks + ): Promise { + this.lastQueryText = query; + return super.similaritySearch(query, k, filter, _callbacks); + } + /** * Method to perform a similarity search in the Elasticsearch database * using a vector. It returns the k most similar documents along with @@ -353,6 +366,12 @@ export class ElasticVectorSearch extends VectorStore { }, }; + if (this.strategy?.excludeSourceVectors !== undefined) { + request.settings = { + "index.mapping.exclude_source_vectors": this.strategy.excludeSourceVectors, + }; + } + const indexExists = await this.doesIndexExist(); if (indexExists) return; From cfeea39e7aa4f2a7853ff08c56bd77b24ef642da Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Thu, 13 Nov 2025 15:30:01 -0500 Subject: [PATCH 04/10] add rrf retriever --- .../src/vectorstores/elasticsearch.ts | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 20575eb8a383..9b4e34f1aa69 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -220,6 +220,15 @@ export class ElasticVectorSearch extends VectorStore { k: number, filter?: ElasticFilter ): Promise<[Document, number][]> { + if (this.strategy && this.lastQueryText) { + return this.hybridSearchVectorWithScore( + this.lastQueryText, + query, + k, + filter + ); + } + const result = await this.client.search({ index: this.indexName, size: k, @@ -242,6 +251,58 @@ export class ElasticVectorSearch extends VectorStore { ]); } + private async hybridSearchVectorWithScore( + queryText: string, + queryVector: number[], + k: number, + filter?: ElasticFilter + ): Promise<[Document, number][]> { + const metadataTerms = this.buildMetadataTerms(filter); + const filterClauses = metadataTerms.must.length > 0 || metadataTerms.must_not.length > 0 + ? { bool: metadataTerms } + : undefined; + + const result = await this.client.search({ + index: this.indexName, + size: k, + retriever: { + rrf: { + retrievers: [ + { + standard: { + query: { + match: { + [this.strategy!.textField]: queryText, + }, + }, + }, + }, + { + knn: { + field: "embedding", + query_vector: queryVector, + k, + num_candidates: this.candidates, + }, + }, + ], + rank_window_size: this.strategy!.rankWindowSize, + rank_constant: this.strategy!.rankConstant, + }, + }, + ...(filterClauses && { query: filterClauses }), + }); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return result.hits.hits.map((hit: any) => [ + new Document({ + pageContent: hit._source.text, + metadata: hit._source.metadata, + }), + hit._score, + ]); + } + /** * Method to delete documents from the Elasticsearch database. * @param params Object containing the IDs of the documents to delete. From 9001bec943227cdbb60c1ce8c3506a939443340a Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Thu, 13 Nov 2025 17:53:12 -0500 Subject: [PATCH 05/10] tests for default --- .../tests/elasticsearch.int.test.ts | 102 +++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index c33812dc49b2..0ce1eab5f412 100644 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -2,7 +2,7 @@ import { test, expect } from "@jest/globals"; import { Client, ClientOptions } from "@elastic/elasticsearch"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; -import { ElasticVectorSearch } from "../elasticsearch.js"; +import { ElasticVectorSearch, HybridRetrievalStrategy } from "../elasticsearch.js"; describe("ElasticVectorSearch", () => { let store: ElasticVectorSearch; @@ -156,3 +156,103 @@ describe("ElasticVectorSearch", () => { ]); }); }); + +describe("ElasticVectorSearch - Backward Compatibility", () => { + let client: Client; + let embeddings: OpenAIEmbeddings; + + beforeEach(() => { + if (!process.env.ELASTIC_URL) { + throw new Error("ELASTIC_URL not set"); + } + + const config: ClientOptions = { + node: process.env.ELASTIC_URL, + }; + if (process.env.ELASTIC_API_KEY) { + config.auth = { + apiKey: process.env.ELASTIC_API_KEY, + }; + } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { + config.auth = { + username: process.env.ELASTIC_USERNAME, + password: process.env.ELASTIC_PASSWORD, + }; + } + client = new Client(config); + embeddings = new OpenAIEmbeddings(); + }); + + test.skip("Pure vector search without strategy works unchanged", async () => { + const indexName = "test_backward_compat_pure"; + const store = new ElasticVectorSearch(embeddings, { client, indexName }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "hello world" }), + new Document({ pageContent: "goodbye world" }), + new Document({ pageContent: "hello universe" }), + ]); + + const results = await store.similaritySearch("hello", 2); + + expect(results).toHaveLength(2); + expect(results[0]).toBeInstanceOf(Document); + expect(results[0].pageContent).toContain("hello"); + }); + + test.skip("similaritySearchVectorWithScore works without strategy", async () => { + const indexName = "test_backward_compat_scores"; + const store = new ElasticVectorSearch(embeddings, { client, indexName }); + await store.deleteIfExists(); + + const createdAt = new Date().getTime(); + await store.addDocuments([ + new Document({ pageContent: "vector search", metadata: { a: createdAt } }), + new Document({ pageContent: "semantic search", metadata: { a: createdAt } }), + new Document({ pageContent: "keyword search", metadata: { a: createdAt + 1 } }), + ]); + + const queryVector = await embeddings.embedQuery("vector"); + const results = await store.similaritySearchVectorWithScore( + queryVector, + 2, + { a: createdAt } + ); + + expect(results).toHaveLength(2); + results.forEach(([doc, score]) => { + expect(doc).toBeInstanceOf(Document); + expect(typeof score).toBe("number"); + expect(score).toBeGreaterThan(0); + expect(doc.metadata.a).toBe(createdAt); + }); + }); + + test.skip("fromTexts static method works without strategy", async () => { + const indexName = "test_backward_compat_fromtexts"; + + const store = await ElasticVectorSearch.fromTexts( + ["first document", "second document", "third document"], + [{ id: 1 }, { id: 2 }, { id: 3 }], + embeddings, + { client, indexName } + ); + + await store.deleteIfExists(); + + const newStore = await ElasticVectorSearch.fromTexts( + ["first document", "second document", "third document"], + [{ id: 1 }, { id: 2 }, { id: 3 }], + embeddings, + { client, indexName } + ); + + const results = await newStore.similaritySearch("first", 1); + + expect(results).toHaveLength(1); + expect(results[0]).toBeInstanceOf(Document); + expect(results[0].pageContent).toBe("first document"); + expect(results[0].metadata.id).toBe(1); + }); +}); From 3950bb692903c539577827ca1e0ae2829940c415 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Thu, 13 Nov 2025 19:59:27 -0500 Subject: [PATCH 06/10] tests for default --- .../tests/elasticsearch.int.test.ts | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index 0ce1eab5f412..6bf90323092b 100644 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -256,3 +256,143 @@ describe("ElasticVectorSearch - Backward Compatibility", () => { expect(results[0].metadata.id).toBe(1); }); }); + +describe("ElasticVectorSearch - Hybrid Search", () => { + let client: Client; + let embeddings: OpenAIEmbeddings; + + beforeEach(() => { + if (!process.env.ELASTIC_URL) { + throw new Error("ELASTIC_URL not set"); + } + + const config: ClientOptions = { + node: process.env.ELASTIC_URL, + }; + if (process.env.ELASTIC_API_KEY) { + config.auth = { + apiKey: process.env.ELASTIC_API_KEY, + }; + } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { + config.auth = { + username: process.env.ELASTIC_USERNAME, + password: process.env.ELASTIC_PASSWORD, + }; + } + client = new Client(config); + embeddings = new OpenAIEmbeddings(); + }); + + test.skip("Hybrid search with default strategy", async () => { + const indexName = "test_hybrid_default"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy(), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "The quick brown fox jumps over the lazy dog" }), + new Document({ pageContent: "Machine learning and artificial intelligence" }), + new Document({ pageContent: "Elasticsearch vector search capabilities" }), + new Document({ pageContent: "A fox in the forest during autumn" }), + ]); + + const results = await store.similaritySearch("fox in the woods", 2); + + expect(results).toHaveLength(2); + expect(results[0]).toBeInstanceOf(Document); + expect(results.some(doc => doc.pageContent.includes("fox"))).toBe(true); + }); + + test.skip("Hybrid search with custom RRF parameters", async () => { + const indexName = "test_hybrid_custom_rrf"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy({ + rankWindowSize: 200, + rankConstant: 80, + textField: "text", + }), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "search engines and databases" }), + new Document({ pageContent: "vector embeddings for search" }), + new Document({ pageContent: "neural networks and deep learning" }), + ]); + + const results = await store.similaritySearch("search technology", 2); + + expect(results).toHaveLength(2); + expect(results[0]).toBeInstanceOf(Document); + }); + + test.skip("Hybrid search returns scores correctly", async () => { + const indexName = "test_hybrid_scores"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy(), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "Elasticsearch hybrid search" }), + new Document({ pageContent: "Vector similarity search" }), + new Document({ pageContent: "Full text search with BM25" }), + ]); + + const queryVector = await embeddings.embedQuery("hybrid search"); + const results = await store.similaritySearchVectorWithScore(queryVector, 3); + + expect(results).toHaveLength(3); + results.forEach(([doc, score]) => { + expect(doc).toBeInstanceOf(Document); + expect(typeof score).toBe("number"); + expect(score).toBeGreaterThan(0); + }); + }); + + test.skip("Hybrid search with metadata filters", async () => { + const indexName = "test_hybrid_filters"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy(), + }); + await store.deleteIfExists(); + + const createdAt = new Date().getTime(); + await store.addDocuments([ + new Document({ + pageContent: "Technology article about AI", + metadata: { category: "tech", date: createdAt } + }), + new Document({ + pageContent: "Sports article about football", + metadata: { category: "sports", date: createdAt } + }), + new Document({ + pageContent: "Technology article about ML", + metadata: { category: "tech", date: createdAt } + }), + new Document({ + pageContent: "Sports article about basketball", + metadata: { category: "sports", date: createdAt + 1 } + }), + ]); + + const results = await store.similaritySearch("article about technology", 5, { + category: "tech", + }); + + expect(results.length).toBeLessThanOrEqual(2); + results.forEach(doc => { + expect(doc.metadata.category).toBe("tech"); + }); + }); +}); From c28160b744811eb0710e108e5dc55293cb038028 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Fri, 14 Nov 2025 10:16:04 -0500 Subject: [PATCH 07/10] tests --- .../src/vectorstores/elasticsearch.ts | 2 +- .../tests/elasticsearch.int.test.ts | 98 +++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 9b4e34f1aa69..7a94e8791075 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -33,7 +33,7 @@ export interface HybridRetrievalStrategyConfig { rankConstant?: number; textField?: string; /** - * For Elasticsearch 9.x, set to `false` to include vectors in responses. + * For Elasticsearch 9.2, set to `false` to include vectors in responses. */ excludeSourceVectors?: boolean; } diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index 6bf90323092b..183c239e1858 100644 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -396,3 +396,101 @@ describe("ElasticVectorSearch - Hybrid Search", () => { }); }); }); + +describe("ElasticVectorSearch - ES 9.x Compatibility", () => { + let client: Client; + let embeddings: OpenAIEmbeddings; + + beforeEach(() => { + if (!process.env.ELASTIC_URL) { + throw new Error("ELASTIC_URL not set"); + } + + const config: ClientOptions = { + node: process.env.ELASTIC_URL, + }; + if (process.env.ELASTIC_API_KEY) { + config.auth = { + apiKey: process.env.ELASTIC_API_KEY, + }; + } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { + config.auth = { + username: process.env.ELASTIC_USERNAME, + password: process.env.ELASTIC_PASSWORD, + }; + } + client = new Client(config); + embeddings = new OpenAIEmbeddings(); + }); + + test.skip("Hybrid search with excludeSourceVectors set to false for ES 9.x", async () => { + const indexName = "test_es9_exclude_false"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy({ + excludeSourceVectors: false, + }), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "Document for ES 9.x testing" }), + new Document({ pageContent: "Another document for compatibility" }), + ]); + + const results = await store.similaritySearch("testing", 2); + + expect(results).toHaveLength(2); + expect(results[0]).toBeInstanceOf(Document); + + const indexSettings = await client.indices.getSettings({ + index: indexName, + }); + expect( + indexSettings[indexName].settings?.index?.mapping?.exclude_source_vectors + ).toBe("false"); + }); + + test.skip("Hybrid search with excludeSourceVectors undefined uses ES defaults", async () => { + const indexName = "test_es_default_exclude"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy(), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "Test with default settings" }), + ]); + + const results = await store.similaritySearch("test", 1); + + expect(results).toHaveLength(1); + expect(results[0]).toBeInstanceOf(Document); + }); + + test.skip("Pure vector search with excludeSourceVectors for ES 9.x", async () => { + const indexName = "test_es9_pure_vector"; + const store = new ElasticVectorSearch(embeddings, { + client, + indexName, + strategy: new HybridRetrievalStrategy({ + excludeSourceVectors: false, + }), + }); + await store.deleteIfExists(); + + await store.addDocuments([ + new Document({ pageContent: "ES 9.x pure vector test" }), + ]); + + const queryVector = await embeddings.embedQuery("vector test"); + const results = await store.similaritySearchVectorWithScore(queryVector, 1); + + expect(results).toHaveLength(1); + expect(results[0][0]).toBeInstanceOf(Document); + expect(typeof results[0][1]).toBe("number"); + }); +}); From 73246de86c5dc9803d64e4df6fc486e6ffc23220 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 17 Nov 2025 10:58:42 -0500 Subject: [PATCH 08/10] docs and examples --- .../elasticsearch/elasticsearch_hybrid.ts | 140 ++++++++++++++++++ .../src/vectorstores/elasticsearch.ts | 46 ++++-- .../tests/elasticsearch.int.test.ts | 23 +-- 3 files changed, 180 insertions(+), 29 deletions(-) create mode 100644 examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts diff --git a/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts b/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts new file mode 100644 index 000000000000..05192a7d418f --- /dev/null +++ b/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts @@ -0,0 +1,140 @@ +import { Client, ClientOptions } from "@elastic/elasticsearch"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { + ElasticClientArgs, + ElasticVectorSearch, + HybridRetrievalStrategy, +} from "@langchain/community/vectorstores/elasticsearch"; +import { Document } from "@langchain/core/documents"; + +/** + * Demonstrates hybrid search with Elasticsearch, combining: + * - Vector (semantic) search using embeddings + * - BM25 (lexical) full-text search + * - Reciprocal Rank Fusion (RRF) for result merging + * + * Requirements: + * - Elasticsearch 8.9+ (for RRF support) + * - Run: docker-compose up -d --build (in elasticsearch directory) + * - Set ELASTIC_URL, ELASTIC_API_KEY (or ELASTIC_USERNAME/ELASTIC_PASSWORD) + */ +export async function run() { + // Configure Elasticsearch client + const config: ClientOptions = { + node: process.env.ELASTIC_URL ?? "http://127.0.0.1:9200", + }; + if (process.env.ELASTIC_API_KEY) { + config.auth = { + apiKey: process.env.ELASTIC_API_KEY, + }; + } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { + config.auth = { + username: process.env.ELASTIC_USERNAME, + password: process.env.ELASTIC_PASSWORD, + }; + } + + const embeddings = new OpenAIEmbeddings(); + + // Create vector store with hybrid search strategy + const clientArgs: ElasticClientArgs = { + client: new Client(config), + indexName: process.env.ELASTIC_INDEX ?? "test_hybrid_search", + strategy: new HybridRetrievalStrategy({ + rankWindowSize: 100, // Number of documents to consider for RRF + rankConstant: 60, // RRF constant for score normalization + textField: "text", // Field to use for BM25 search + }), + }; + + const vectorStore = new ElasticVectorSearch(embeddings, clientArgs); + + // Clean up any existing data + await vectorStore.deleteIfExists(); + + // Add sample documents + const docs = [ + new Document({ + pageContent: "Running helps build cardiovascular endurance and strengthens leg muscles.", + metadata: { category: "fitness", topic: "running" }, + }), + new Document({ + pageContent: "Marathon training requires consistent mileage and proper recovery.", + metadata: { category: "fitness", topic: "running" }, + }), + new Document({ + pageContent: "Muscle soreness after exercise is caused by microscopic damage to muscle fibers.", + metadata: { category: "health", topic: "recovery" }, + }), + new Document({ + pageContent: "Stretching and foam rolling can help prevent post-workout muscle pain.", + metadata: { category: "health", topic: "recovery" }, + }), + new Document({ + pageContent: "Python is a popular programming language for data science and machine learning.", + metadata: { category: "technology", topic: "programming" }, + }), + ]; + + console.log("Adding documents to Elasticsearch..."); + await vectorStore.addDocuments(docs); + console.log("Documents added successfully!\n"); + + // Example 1: Hybrid search combines semantic + keyword matching + console.log("=== Example 1: Hybrid Search ==="); + const query1 = "How to avoid muscle soreness while running?"; + console.log(`Query: "${query1}"\n`); + + const results1 = await vectorStore.similaritySearchWithScore(query1, 3); + results1.forEach(([doc, score], i) => { + console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`); + console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`); + }); + + // Example 2: Semantic search works well for conceptual queries + console.log("\n=== Example 2: Semantic Query ==="); + const query2 = "tips for preventing pain after workouts"; + console.log(`Query: "${query2}"\n`); + + const results2 = await vectorStore.similaritySearchWithScore(query2, 2); + results2.forEach(([doc, score], i) => { + console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`); + console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`); + }); + + // Example 3: With metadata filters + console.log("\n=== Example 3: Hybrid Search with Filters ==="); + const query3 = "fitness advice"; + console.log(`Query: "${query3}"`); + console.log(`Filter: category = "fitness"\n`); + + const results3 = await vectorStore.similaritySearchWithScore( + query3, + 3, + { category: "fitness" } + ); + results3.forEach(([doc, score], i) => { + console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`); + console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`); + }); + + // Clean up + console.log("\n=== Cleanup ==="); + await vectorStore.deleteIfExists(); + console.log("Index deleted."); +} + +/** + * For Elasticsearch 9.2+: + * If you need to include vectors in the response, set includeSourceVectors: + * + * strategy: new HybridRetrievalStrategy({ + * includeSourceVectors: true, + * rankWindowSize: 100, + * rankConstant: 60, + * }) + * + * Note: This is only needed if you're on ES 9.2+ and want vector data + * in search responses. ES 9.2+ excludes vectors by default for performance. + */ + diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 7a94e8791075..3cc1dea9a7b0 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -33,9 +33,14 @@ export interface HybridRetrievalStrategyConfig { rankConstant?: number; textField?: string; /** - * For Elasticsearch 9.2, set to `false` to include vectors in responses. + * Include source vectors in search responses. + * + * Elasticsearch 9.2+ excludes vectors from `_source` by default. + * Set to `true` to include vectors in responses for ES 9.2+. + * + * Note: ES < 8.19 does not support this parameter. */ - excludeSourceVectors?: boolean; + includeSourceVectors?: boolean; } /** @@ -45,13 +50,13 @@ export class HybridRetrievalStrategy { public readonly rankWindowSize: number; public readonly rankConstant: number; public readonly textField: string; - public readonly excludeSourceVectors?: boolean; + public readonly includeSourceVectors?: boolean; constructor(config: HybridRetrievalStrategyConfig = {}) { this.rankWindowSize = config.rankWindowSize ?? 100; this.rankConstant = config.rankConstant ?? 60; this.textField = config.textField ?? "text"; - this.excludeSourceVectors = config.excludeSourceVectors; + this.includeSourceVectors = config.includeSourceVectors; } } @@ -83,10 +88,23 @@ type ElasticMetadataTerms = { }; /** - * Class for interacting with an Elasticsearch database. It extends the - * VectorStore base class and provides methods for adding documents and - * vectors to the Elasticsearch database, performing similarity searches, - * deleting documents, and more. + * Elasticsearch vector store supporting vector and hybrid search. + * + * Hybrid search combines kNN vector search with BM25 full-text search + * using RRF. Enable by passing a `HybridRetrievalStrategy` to the constructor. + * + * @example + * ```typescript + * // Vector search (default) + * const vectorStore = new ElasticVectorSearch(embeddings, { client, indexName }); + * + * // Hybrid search + * const hybridStore = new ElasticVectorSearch(embeddings, { + * client, + * indexName, + * strategy: new HybridRetrievalStrategy() + * }); + * ``` */ export class ElasticVectorSearch extends VectorStore { declare FilterType: ElasticFilter; @@ -239,6 +257,9 @@ export class ElasticVectorSearch extends VectorStore { k, num_candidates: this.candidates, }, + ...(this.strategy?.includeSourceVectors === true && { + _source: { includes: ["*"] }, + }), }); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -291,6 +312,9 @@ export class ElasticVectorSearch extends VectorStore { }, }, ...(filterClauses && { query: filterClauses }), + ...(this.strategy?.includeSourceVectors === true && { + _source: { includes: ["*"] }, + }), }); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -427,12 +451,6 @@ export class ElasticVectorSearch extends VectorStore { }, }; - if (this.strategy?.excludeSourceVectors !== undefined) { - request.settings = { - "index.mapping.exclude_source_vectors": this.strategy.excludeSourceVectors, - }; - } - const indexExists = await this.doesIndexExist(); if (indexExists) return; diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index 183c239e1858..f9244f8d402a 100644 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -423,19 +423,19 @@ describe("ElasticVectorSearch - ES 9.x Compatibility", () => { embeddings = new OpenAIEmbeddings(); }); - test.skip("Hybrid search with excludeSourceVectors set to false for ES 9.x", async () => { - const indexName = "test_es9_exclude_false"; + test.skip("Hybrid search with includeSourceVectors set to true for ES 9.2+", async () => { + const indexName = "test_es9_include_vectors"; const store = new ElasticVectorSearch(embeddings, { client, indexName, strategy: new HybridRetrievalStrategy({ - excludeSourceVectors: false, + includeSourceVectors: true, }), }); await store.deleteIfExists(); await store.addDocuments([ - new Document({ pageContent: "Document for ES 9.x testing" }), + new Document({ pageContent: "Document for ES 9.2+ testing" }), new Document({ pageContent: "Another document for compatibility" }), ]); @@ -443,17 +443,10 @@ describe("ElasticVectorSearch - ES 9.x Compatibility", () => { expect(results).toHaveLength(2); expect(results[0]).toBeInstanceOf(Document); - - const indexSettings = await client.indices.getSettings({ - index: indexName, - }); - expect( - indexSettings[indexName].settings?.index?.mapping?.exclude_source_vectors - ).toBe("false"); }); - test.skip("Hybrid search with excludeSourceVectors undefined uses ES defaults", async () => { - const indexName = "test_es_default_exclude"; + test.skip("Hybrid search with includeSourceVectors undefined uses ES defaults", async () => { + const indexName = "test_es_default_include"; const store = new ElasticVectorSearch(embeddings, { client, indexName, @@ -471,13 +464,13 @@ describe("ElasticVectorSearch - ES 9.x Compatibility", () => { expect(results[0]).toBeInstanceOf(Document); }); - test.skip("Pure vector search with excludeSourceVectors for ES 9.x", async () => { + test.skip("Pure vector search with includeSourceVectors for ES 9.2+", async () => { const indexName = "test_es9_pure_vector"; const store = new ElasticVectorSearch(embeddings, { client, indexName, strategy: new HybridRetrievalStrategy({ - excludeSourceVectors: false, + includeSourceVectors: true, }), }); await store.deleteIfExists(); From f0383b6b69d279091732dd2445393bba5c3c1d73 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 18 Nov 2025 13:43:25 -0500 Subject: [PATCH 09/10] remove 9.2 compliance --- .../elasticsearch/elasticsearch_hybrid.ts | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts b/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts index 05192a7d418f..7aebee3380e6 100644 --- a/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts +++ b/examples/src/langchain-classic/indexes/vector_stores/elasticsearch/elasticsearch_hybrid.ts @@ -19,7 +19,6 @@ import { Document } from "@langchain/core/documents"; * - Set ELASTIC_URL, ELASTIC_API_KEY (or ELASTIC_USERNAME/ELASTIC_PASSWORD) */ export async function run() { - // Configure Elasticsearch client const config: ClientOptions = { node: process.env.ELASTIC_URL ?? "http://127.0.0.1:9200", }; @@ -36,20 +35,18 @@ export async function run() { const embeddings = new OpenAIEmbeddings(); - // Create vector store with hybrid search strategy const clientArgs: ElasticClientArgs = { client: new Client(config), indexName: process.env.ELASTIC_INDEX ?? "test_hybrid_search", strategy: new HybridRetrievalStrategy({ - rankWindowSize: 100, // Number of documents to consider for RRF - rankConstant: 60, // RRF constant for score normalization - textField: "text", // Field to use for BM25 search + rankWindowSize: 100, + rankConstant: 60, + textField: "text", }), }; const vectorStore = new ElasticVectorSearch(embeddings, clientArgs); - // Clean up any existing data await vectorStore.deleteIfExists(); // Add sample documents @@ -124,17 +121,3 @@ export async function run() { console.log("Index deleted."); } -/** - * For Elasticsearch 9.2+: - * If you need to include vectors in the response, set includeSourceVectors: - * - * strategy: new HybridRetrievalStrategy({ - * includeSourceVectors: true, - * rankWindowSize: 100, - * rankConstant: 60, - * }) - * - * Note: This is only needed if you're on ES 9.2+ and want vector data - * in search responses. ES 9.2+ excludes vectors by default for performance. - */ - From 9e26a6b0ec1fc71ea11d52cc10a387be781f2fc4 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Wed, 19 Nov 2025 10:19:01 -0500 Subject: [PATCH 10/10] remove 9.2 compliance --- .../src/vectorstores/elasticsearch.ts | 17 ---- .../tests/elasticsearch.int.test.ts | 91 ------------------- 2 files changed, 108 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index 3cc1dea9a7b0..dc0a459dd446 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -32,15 +32,6 @@ export interface HybridRetrievalStrategyConfig { rankWindowSize?: number; rankConstant?: number; textField?: string; - /** - * Include source vectors in search responses. - * - * Elasticsearch 9.2+ excludes vectors from `_source` by default. - * Set to `true` to include vectors in responses for ES 9.2+. - * - * Note: ES < 8.19 does not support this parameter. - */ - includeSourceVectors?: boolean; } /** @@ -50,13 +41,11 @@ export class HybridRetrievalStrategy { public readonly rankWindowSize: number; public readonly rankConstant: number; public readonly textField: string; - public readonly includeSourceVectors?: boolean; constructor(config: HybridRetrievalStrategyConfig = {}) { this.rankWindowSize = config.rankWindowSize ?? 100; this.rankConstant = config.rankConstant ?? 60; this.textField = config.textField ?? "text"; - this.includeSourceVectors = config.includeSourceVectors; } } @@ -257,9 +246,6 @@ export class ElasticVectorSearch extends VectorStore { k, num_candidates: this.candidates, }, - ...(this.strategy?.includeSourceVectors === true && { - _source: { includes: ["*"] }, - }), }); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -312,9 +298,6 @@ export class ElasticVectorSearch extends VectorStore { }, }, ...(filterClauses && { query: filterClauses }), - ...(this.strategy?.includeSourceVectors === true && { - _source: { includes: ["*"] }, - }), }); // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index f9244f8d402a..6bf90323092b 100644 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -396,94 +396,3 @@ describe("ElasticVectorSearch - Hybrid Search", () => { }); }); }); - -describe("ElasticVectorSearch - ES 9.x Compatibility", () => { - let client: Client; - let embeddings: OpenAIEmbeddings; - - beforeEach(() => { - if (!process.env.ELASTIC_URL) { - throw new Error("ELASTIC_URL not set"); - } - - const config: ClientOptions = { - node: process.env.ELASTIC_URL, - }; - if (process.env.ELASTIC_API_KEY) { - config.auth = { - apiKey: process.env.ELASTIC_API_KEY, - }; - } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { - config.auth = { - username: process.env.ELASTIC_USERNAME, - password: process.env.ELASTIC_PASSWORD, - }; - } - client = new Client(config); - embeddings = new OpenAIEmbeddings(); - }); - - test.skip("Hybrid search with includeSourceVectors set to true for ES 9.2+", async () => { - const indexName = "test_es9_include_vectors"; - const store = new ElasticVectorSearch(embeddings, { - client, - indexName, - strategy: new HybridRetrievalStrategy({ - includeSourceVectors: true, - }), - }); - await store.deleteIfExists(); - - await store.addDocuments([ - new Document({ pageContent: "Document for ES 9.2+ testing" }), - new Document({ pageContent: "Another document for compatibility" }), - ]); - - const results = await store.similaritySearch("testing", 2); - - expect(results).toHaveLength(2); - expect(results[0]).toBeInstanceOf(Document); - }); - - test.skip("Hybrid search with includeSourceVectors undefined uses ES defaults", async () => { - const indexName = "test_es_default_include"; - const store = new ElasticVectorSearch(embeddings, { - client, - indexName, - strategy: new HybridRetrievalStrategy(), - }); - await store.deleteIfExists(); - - await store.addDocuments([ - new Document({ pageContent: "Test with default settings" }), - ]); - - const results = await store.similaritySearch("test", 1); - - expect(results).toHaveLength(1); - expect(results[0]).toBeInstanceOf(Document); - }); - - test.skip("Pure vector search with includeSourceVectors for ES 9.2+", async () => { - const indexName = "test_es9_pure_vector"; - const store = new ElasticVectorSearch(embeddings, { - client, - indexName, - strategy: new HybridRetrievalStrategy({ - includeSourceVectors: true, - }), - }); - await store.deleteIfExists(); - - await store.addDocuments([ - new Document({ pageContent: "ES 9.x pure vector test" }), - ]); - - const queryVector = await embeddings.embedQuery("vector test"); - const results = await store.similaritySearchVectorWithScore(queryVector, 1); - - expect(results).toHaveLength(1); - expect(results[0][0]).toBeInstanceOf(Document); - expect(typeof results[0][1]).toBe("number"); - }); -});