Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { Client, ClientOptions } from "@elastic/elasticsearch";
import { OpenAIEmbeddings } from "@langchain/openai";
import {
ElasticClientArgs,
ElasticVectorSearch,
HybridRetrievalStrategy,
} from "@langchain/community/vectorstores/elasticsearch";
import { Document } from "@langchain/core/documents";

/**
* Demonstrates hybrid search with Elasticsearch, combining:
* - Vector (semantic) search using embeddings
* - BM25 (lexical) full-text search
* - Reciprocal Rank Fusion (RRF) for result merging
*
* Requirements:
* - Elasticsearch 8.9+ (for RRF support)
* - Run: docker-compose up -d --build (in elasticsearch directory)
* - Set ELASTIC_URL, ELASTIC_API_KEY (or ELASTIC_USERNAME/ELASTIC_PASSWORD)
*/
export async function run() {
const config: ClientOptions = {
node: process.env.ELASTIC_URL ?? "http://127.0.0.1:9200",
};
if (process.env.ELASTIC_API_KEY) {
config.auth = {
apiKey: process.env.ELASTIC_API_KEY,
};
} else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) {
config.auth = {
username: process.env.ELASTIC_USERNAME,
password: process.env.ELASTIC_PASSWORD,
};
}

const embeddings = new OpenAIEmbeddings();

const clientArgs: ElasticClientArgs = {
client: new Client(config),
indexName: process.env.ELASTIC_INDEX ?? "test_hybrid_search",
strategy: new HybridRetrievalStrategy({
rankWindowSize: 100,
rankConstant: 60,
textField: "text",
}),
};

const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);

await vectorStore.deleteIfExists();

// Add sample documents
const docs = [
new Document({
pageContent: "Running helps build cardiovascular endurance and strengthens leg muscles.",
metadata: { category: "fitness", topic: "running" },
}),
new Document({
pageContent: "Marathon training requires consistent mileage and proper recovery.",
metadata: { category: "fitness", topic: "running" },
}),
new Document({
pageContent: "Muscle soreness after exercise is caused by microscopic damage to muscle fibers.",
metadata: { category: "health", topic: "recovery" },
}),
new Document({
pageContent: "Stretching and foam rolling can help prevent post-workout muscle pain.",
metadata: { category: "health", topic: "recovery" },
}),
new Document({
pageContent: "Python is a popular programming language for data science and machine learning.",
metadata: { category: "technology", topic: "programming" },
}),
];

console.log("Adding documents to Elasticsearch...");
await vectorStore.addDocuments(docs);
console.log("Documents added successfully!\n");

// Example 1: Hybrid search combines semantic + keyword matching
console.log("=== Example 1: Hybrid Search ===");
const query1 = "How to avoid muscle soreness while running?";
console.log(`Query: "${query1}"\n`);

const results1 = await vectorStore.similaritySearchWithScore(query1, 3);
results1.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Example 2: Semantic search works well for conceptual queries
console.log("\n=== Example 2: Semantic Query ===");
const query2 = "tips for preventing pain after workouts";
console.log(`Query: "${query2}"\n`);

const results2 = await vectorStore.similaritySearchWithScore(query2, 2);
results2.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Example 3: With metadata filters
console.log("\n=== Example 3: Hybrid Search with Filters ===");
const query3 = "fitness advice";
console.log(`Query: "${query3}"`);
console.log(`Filter: category = "fitness"\n`);

const results3 = await vectorStore.similaritySearchWithScore(
query3,
3,
{ category: "fitness" }
);
results3.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Clean up
console.log("\n=== Cleanup ===");
await vectorStore.deleteIfExists();
console.log("Index deleted.");
}

129 changes: 124 additions & 5 deletions libs/langchain-community/src/vectorstores/elasticsearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Client, estypes } from "@elastic/elasticsearch";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import type { Callbacks } from "@langchain/core/callbacks/manager";
/**
* Type representing the k-nearest neighbors (k-NN) engine used in
* Elasticsearch.
Expand All @@ -24,6 +25,30 @@ interface VectorSearchOptions {
readonly candidates?: number;
}

/**
* Configuration options for hybrid retrieval strategy.
*/
export interface HybridRetrievalStrategyConfig {
rankWindowSize?: number;
rankConstant?: number;
textField?: string;
}

/**
* Hybrid search strategy combining vector and BM25 search using RRF.
*/
export class HybridRetrievalStrategy {
public readonly rankWindowSize: number;
public readonly rankConstant: number;
public readonly textField: string;

constructor(config: HybridRetrievalStrategyConfig = {}) {
this.rankWindowSize = config.rankWindowSize ?? 100;
this.rankConstant = config.rankConstant ?? 60;
this.textField = config.textField ?? "text";
}
}

/**
* Interface defining the arguments required to create an Elasticsearch
* client.
Expand All @@ -32,6 +57,7 @@ export interface ElasticClientArgs {
readonly client: Client;
readonly indexName?: string;
readonly vectorSearchOptions?: VectorSearchOptions;
readonly strategy?: HybridRetrievalStrategy;
}

/**
Expand All @@ -51,10 +77,23 @@ type ElasticMetadataTerms = {
};

/**
* Class for interacting with an Elasticsearch database. It extends the
* VectorStore base class and provides methods for adding documents and
* vectors to the Elasticsearch database, performing similarity searches,
* deleting documents, and more.
* Elasticsearch vector store supporting vector and hybrid search.
*
* Hybrid search combines kNN vector search with BM25 full-text search
* using RRF. Enable by passing a `HybridRetrievalStrategy` to the constructor.
*
* @example
* ```typescript
* // Vector search (default)
* const vectorStore = new ElasticVectorSearch(embeddings, { client, indexName });
*
* // Hybrid search
* const hybridStore = new ElasticVectorSearch(embeddings, {
* client,
* indexName,
* strategy: new HybridRetrievalStrategy()
* });
* ```
*/
export class ElasticVectorSearch extends VectorStore {
declare FilterType: ElasticFilter;
Expand All @@ -73,6 +112,10 @@ export class ElasticVectorSearch extends VectorStore {

private readonly candidates: number;

private readonly strategy?: HybridRetrievalStrategy;

private lastQueryText?: string;

_vectorstoreType(): string {
return "elasticsearch";
}
Expand All @@ -85,9 +128,14 @@ export class ElasticVectorSearch extends VectorStore {
this.m = args.vectorSearchOptions?.m ?? 16;
this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 100;
this.candidates = args.vectorSearchOptions?.candidates ?? 200;
this.strategy = args.strategy;

const userAgent = this.strategy
? "langchain-js-vs-hybrid/0.0.1"
: "langchain-js-vs/0.0.1";

this.client = args.client.child({
headers: { "user-agent": "langchain-js-vs/0.0.1" },
headers: { "user-agent": userAgent },
});
this.indexName = args.indexName ?? "documents";
}
Expand Down Expand Up @@ -155,6 +203,16 @@ export class ElasticVectorSearch extends VectorStore {
return documentIds;
}

async similaritySearch(
Copy link
Author

@margaretjgu margaretjgu Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default value inferred from the VectorStore class that we extend from

async similaritySearch(
query: string,
k = 4,
filter: this["FilterType"] | undefined = undefined,
_callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later
): Promise<DocumentInterface[]> {
const results = await this.similaritySearchVectorWithScore(
await this.embeddings.embedQuery(query),
k,
filter
);

query: string,
k = 4,
filter?: ElasticFilter,
_callbacks?: Callbacks
): Promise<Document[]> {
this.lastQueryText = query;
return super.similaritySearch(query, k, filter, _callbacks);
}

/**
* Method to perform a similarity search in the Elasticsearch database
* using a vector. It returns the k most similar documents along with
Expand All @@ -169,6 +227,15 @@ export class ElasticVectorSearch extends VectorStore {
k: number,
filter?: ElasticFilter
): Promise<[Document, number][]> {
if (this.strategy && this.lastQueryText) {
return this.hybridSearchVectorWithScore(
this.lastQueryText,
query,
k,
filter
);
}

const result = await this.client.search({
index: this.indexName,
size: k,
Expand All @@ -191,6 +258,58 @@ export class ElasticVectorSearch extends VectorStore {
]);
}

private async hybridSearchVectorWithScore(
queryText: string,
queryVector: number[],
k: number,
filter?: ElasticFilter
): Promise<[Document, number][]> {
const metadataTerms = this.buildMetadataTerms(filter);
const filterClauses = metadataTerms.must.length > 0 || metadataTerms.must_not.length > 0
? { bool: metadataTerms }
: undefined;

const result = await this.client.search({
index: this.indexName,
size: k,
retriever: {
rrf: {
retrievers: [
{
standard: {
query: {
match: {
[this.strategy!.textField]: queryText,
},
},
},
},
{
knn: {
field: "embedding",
query_vector: queryVector,
k,
num_candidates: this.candidates,
},
},
],
rank_window_size: this.strategy!.rankWindowSize,
rank_constant: this.strategy!.rankConstant,
},
},
...(filterClauses && { query: filterClauses }),
});

// eslint-disable-next-line @typescript-eslint/no-explicit-any
return result.hits.hits.map((hit: any) => [
new Document({
pageContent: hit._source.text,
metadata: hit._source.metadata,
}),
hit._score,
]);
}

/**
* Method to delete documents from the Elasticsearch database.
* @param params Object containing the IDs of the documents to delete.
Expand Down
Loading