Skip to content

Commit 73246de

Browse files
committed
docs and examples
1 parent c28160b commit 73246de

File tree

3 files changed

+180
-29
lines changed

3 files changed

+180
-29
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import { Client, ClientOptions } from "@elastic/elasticsearch";
2+
import { OpenAIEmbeddings } from "@langchain/openai";
3+
import {
4+
ElasticClientArgs,
5+
ElasticVectorSearch,
6+
HybridRetrievalStrategy,
7+
} from "@langchain/community/vectorstores/elasticsearch";
8+
import { Document } from "@langchain/core/documents";
9+
10+
/**
11+
* Demonstrates hybrid search with Elasticsearch, combining:
12+
* - Vector (semantic) search using embeddings
13+
* - BM25 (lexical) full-text search
14+
* - Reciprocal Rank Fusion (RRF) for result merging
15+
*
16+
* Requirements:
17+
* - Elasticsearch 8.9+ (for RRF support)
18+
* - Run: docker-compose up -d --build (in elasticsearch directory)
19+
* - Set ELASTIC_URL, ELASTIC_API_KEY (or ELASTIC_USERNAME/ELASTIC_PASSWORD)
20+
*/
21+
export async function run() {
22+
// Configure Elasticsearch client
23+
const config: ClientOptions = {
24+
node: process.env.ELASTIC_URL ?? "http://127.0.0.1:9200",
25+
};
26+
if (process.env.ELASTIC_API_KEY) {
27+
config.auth = {
28+
apiKey: process.env.ELASTIC_API_KEY,
29+
};
30+
} else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) {
31+
config.auth = {
32+
username: process.env.ELASTIC_USERNAME,
33+
password: process.env.ELASTIC_PASSWORD,
34+
};
35+
}
36+
37+
const embeddings = new OpenAIEmbeddings();
38+
39+
// Create vector store with hybrid search strategy
40+
const clientArgs: ElasticClientArgs = {
41+
client: new Client(config),
42+
indexName: process.env.ELASTIC_INDEX ?? "test_hybrid_search",
43+
strategy: new HybridRetrievalStrategy({
44+
rankWindowSize: 100, // Number of documents to consider for RRF
45+
rankConstant: 60, // RRF constant for score normalization
46+
textField: "text", // Field to use for BM25 search
47+
}),
48+
};
49+
50+
const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);
51+
52+
// Clean up any existing data
53+
await vectorStore.deleteIfExists();
54+
55+
// Add sample documents
56+
const docs = [
57+
new Document({
58+
pageContent: "Running helps build cardiovascular endurance and strengthens leg muscles.",
59+
metadata: { category: "fitness", topic: "running" },
60+
}),
61+
new Document({
62+
pageContent: "Marathon training requires consistent mileage and proper recovery.",
63+
metadata: { category: "fitness", topic: "running" },
64+
}),
65+
new Document({
66+
pageContent: "Muscle soreness after exercise is caused by microscopic damage to muscle fibers.",
67+
metadata: { category: "health", topic: "recovery" },
68+
}),
69+
new Document({
70+
pageContent: "Stretching and foam rolling can help prevent post-workout muscle pain.",
71+
metadata: { category: "health", topic: "recovery" },
72+
}),
73+
new Document({
74+
pageContent: "Python is a popular programming language for data science and machine learning.",
75+
metadata: { category: "technology", topic: "programming" },
76+
}),
77+
];
78+
79+
console.log("Adding documents to Elasticsearch...");
80+
await vectorStore.addDocuments(docs);
81+
console.log("Documents added successfully!\n");
82+
83+
// Example 1: Hybrid search combines semantic + keyword matching
84+
console.log("=== Example 1: Hybrid Search ===");
85+
const query1 = "How to avoid muscle soreness while running?";
86+
console.log(`Query: "${query1}"\n`);
87+
88+
const results1 = await vectorStore.similaritySearchWithScore(query1, 3);
89+
results1.forEach(([doc, score], i) => {
90+
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
91+
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
92+
});
93+
94+
// Example 2: Semantic search works well for conceptual queries
95+
console.log("\n=== Example 2: Semantic Query ===");
96+
const query2 = "tips for preventing pain after workouts";
97+
console.log(`Query: "${query2}"\n`);
98+
99+
const results2 = await vectorStore.similaritySearchWithScore(query2, 2);
100+
results2.forEach(([doc, score], i) => {
101+
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
102+
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
103+
});
104+
105+
// Example 3: With metadata filters
106+
console.log("\n=== Example 3: Hybrid Search with Filters ===");
107+
const query3 = "fitness advice";
108+
console.log(`Query: "${query3}"`);
109+
console.log(`Filter: category = "fitness"\n`);
110+
111+
const results3 = await vectorStore.similaritySearchWithScore(
112+
query3,
113+
3,
114+
{ category: "fitness" }
115+
);
116+
results3.forEach(([doc, score], i) => {
117+
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
118+
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
119+
});
120+
121+
// Clean up
122+
console.log("\n=== Cleanup ===");
123+
await vectorStore.deleteIfExists();
124+
console.log("Index deleted.");
125+
}
126+
127+
/**
128+
* For Elasticsearch 9.2+:
129+
* If you need to include vectors in the response, set includeSourceVectors:
130+
*
131+
* strategy: new HybridRetrievalStrategy({
132+
* includeSourceVectors: true,
133+
* rankWindowSize: 100,
134+
* rankConstant: 60,
135+
* })
136+
*
137+
* Note: This is only needed if you're on ES 9.2+ and want vector data
138+
* in search responses. ES 9.2+ excludes vectors by default for performance.
139+
*/
140+

libs/langchain-community/src/vectorstores/elasticsearch.ts

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,14 @@ export interface HybridRetrievalStrategyConfig {
3333
rankConstant?: number;
3434
textField?: string;
3535
/**
36-
* For Elasticsearch 9.2, set to `false` to include vectors in responses.
36+
* Include source vectors in search responses.
37+
*
38+
* Elasticsearch 9.2+ excludes vectors from `_source` by default.
39+
* Set to `true` to include vectors in responses for ES 9.2+.
40+
*
41+
* Note: ES < 8.19 does not support this parameter.
3742
*/
38-
excludeSourceVectors?: boolean;
43+
includeSourceVectors?: boolean;
3944
}
4045

4146
/**
@@ -45,13 +50,13 @@ export class HybridRetrievalStrategy {
4550
public readonly rankWindowSize: number;
4651
public readonly rankConstant: number;
4752
public readonly textField: string;
48-
public readonly excludeSourceVectors?: boolean;
53+
public readonly includeSourceVectors?: boolean;
4954

5055
constructor(config: HybridRetrievalStrategyConfig = {}) {
5156
this.rankWindowSize = config.rankWindowSize ?? 100;
5257
this.rankConstant = config.rankConstant ?? 60;
5358
this.textField = config.textField ?? "text";
54-
this.excludeSourceVectors = config.excludeSourceVectors;
59+
this.includeSourceVectors = config.includeSourceVectors;
5560
}
5661
}
5762

@@ -83,10 +88,23 @@ type ElasticMetadataTerms = {
8388
};
8489

8590
/**
86-
* Class for interacting with an Elasticsearch database. It extends the
87-
* VectorStore base class and provides methods for adding documents and
88-
* vectors to the Elasticsearch database, performing similarity searches,
89-
* deleting documents, and more.
91+
* Elasticsearch vector store supporting vector and hybrid search.
92+
*
93+
* Hybrid search combines kNN vector search with BM25 full-text search
94+
* using RRF. Enable by passing a `HybridRetrievalStrategy` to the constructor.
95+
*
96+
* @example
97+
* ```typescript
98+
* // Vector search (default)
99+
* const vectorStore = new ElasticVectorSearch(embeddings, { client, indexName });
100+
*
101+
* // Hybrid search
102+
* const hybridStore = new ElasticVectorSearch(embeddings, {
103+
* client,
104+
* indexName,
105+
* strategy: new HybridRetrievalStrategy()
106+
* });
107+
* ```
90108
*/
91109
export class ElasticVectorSearch extends VectorStore {
92110
declare FilterType: ElasticFilter;
@@ -239,6 +257,9 @@ export class ElasticVectorSearch extends VectorStore {
239257
k,
240258
num_candidates: this.candidates,
241259
},
260+
...(this.strategy?.includeSourceVectors === true && {
261+
_source: { includes: ["*"] },
262+
}),
242263
});
243264

244265
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -291,6 +312,9 @@ export class ElasticVectorSearch extends VectorStore {
291312
},
292313
},
293314
...(filterClauses && { query: filterClauses }),
315+
...(this.strategy?.includeSourceVectors === true && {
316+
_source: { includes: ["*"] },
317+
}),
294318
});
295319

296320
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -427,12 +451,6 @@ export class ElasticVectorSearch extends VectorStore {
427451
},
428452
};
429453

430-
if (this.strategy?.excludeSourceVectors !== undefined) {
431-
request.settings = {
432-
"index.mapping.exclude_source_vectors": this.strategy.excludeSourceVectors,
433-
};
434-
}
435-
436454
const indexExists = await this.doesIndexExist();
437455
if (indexExists) return;
438456

libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -423,37 +423,30 @@ describe("ElasticVectorSearch - ES 9.x Compatibility", () => {
423423
embeddings = new OpenAIEmbeddings();
424424
});
425425

426-
test.skip("Hybrid search with excludeSourceVectors set to false for ES 9.x", async () => {
427-
const indexName = "test_es9_exclude_false";
426+
test.skip("Hybrid search with includeSourceVectors set to true for ES 9.2+", async () => {
427+
const indexName = "test_es9_include_vectors";
428428
const store = new ElasticVectorSearch(embeddings, {
429429
client,
430430
indexName,
431431
strategy: new HybridRetrievalStrategy({
432-
excludeSourceVectors: false,
432+
includeSourceVectors: true,
433433
}),
434434
});
435435
await store.deleteIfExists();
436436

437437
await store.addDocuments([
438-
new Document({ pageContent: "Document for ES 9.x testing" }),
438+
new Document({ pageContent: "Document for ES 9.2+ testing" }),
439439
new Document({ pageContent: "Another document for compatibility" }),
440440
]);
441441

442442
const results = await store.similaritySearch("testing", 2);
443443

444444
expect(results).toHaveLength(2);
445445
expect(results[0]).toBeInstanceOf(Document);
446-
447-
const indexSettings = await client.indices.getSettings({
448-
index: indexName,
449-
});
450-
expect(
451-
indexSettings[indexName].settings?.index?.mapping?.exclude_source_vectors
452-
).toBe("false");
453446
});
454447

455-
test.skip("Hybrid search with excludeSourceVectors undefined uses ES defaults", async () => {
456-
const indexName = "test_es_default_exclude";
448+
test.skip("Hybrid search with includeSourceVectors undefined uses ES defaults", async () => {
449+
const indexName = "test_es_default_include";
457450
const store = new ElasticVectorSearch(embeddings, {
458451
client,
459452
indexName,
@@ -471,13 +464,13 @@ describe("ElasticVectorSearch - ES 9.x Compatibility", () => {
471464
expect(results[0]).toBeInstanceOf(Document);
472465
});
473466

474-
test.skip("Pure vector search with excludeSourceVectors for ES 9.x", async () => {
467+
test.skip("Pure vector search with includeSourceVectors for ES 9.2+", async () => {
475468
const indexName = "test_es9_pure_vector";
476469
const store = new ElasticVectorSearch(embeddings, {
477470
client,
478471
indexName,
479472
strategy: new HybridRetrievalStrategy({
480-
excludeSourceVectors: false,
473+
includeSourceVectors: true,
481474
}),
482475
});
483476
await store.deleteIfExists();

0 commit comments

Comments
 (0)