Skip to content

Commit 2d61c55

Browse files
Merge branch 'main' into pkar/msearch-flatworld-search-project-routing
2 parents 076d96e + c2286e1 commit 2d61c55

File tree

166 files changed

+8180
-741
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

166 files changed

+8180
-741
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ dependencies {
4848
api(project(':x-pack:plugin:esql'))
4949
api(project(':x-pack:plugin:esql:compute'))
5050
api(project(':x-pack:plugin:mapper-exponential-histogram'))
51+
api(project(':x-pack:plugin:logsdb'))
5152
implementation project(path: ':libs:native')
5253
implementation project(path: ':libs:simdvec')
5354
implementation project(path: ':libs:exponential-histogram')
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.indices.common;
11+
12+
import org.elasticsearch.TransportVersion;
13+
import org.elasticsearch.cluster.ClusterModule;
14+
import org.elasticsearch.cluster.metadata.IndexMetadata;
15+
import org.elasticsearch.common.compress.CompressedXContent;
16+
import org.elasticsearch.common.logging.LogConfigurator;
17+
import org.elasticsearch.common.settings.IndexScopedSettings;
18+
import org.elasticsearch.common.settings.Setting;
19+
import org.elasticsearch.common.settings.Settings;
20+
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
21+
import org.elasticsearch.index.IndexSettings;
22+
import org.elasticsearch.index.IndexVersion;
23+
import org.elasticsearch.index.analysis.IndexAnalyzers;
24+
import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
25+
import org.elasticsearch.index.mapper.MapperMetrics;
26+
import org.elasticsearch.index.mapper.MapperRegistry;
27+
import org.elasticsearch.index.mapper.MapperService;
28+
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
29+
import org.elasticsearch.index.similarity.SimilarityService;
30+
import org.elasticsearch.indices.IndicesModule;
31+
import org.elasticsearch.script.Script;
32+
import org.elasticsearch.script.ScriptCompiler;
33+
import org.elasticsearch.script.ScriptContext;
34+
import org.elasticsearch.xcontent.NamedXContentRegistry;
35+
import org.elasticsearch.xcontent.XContentParserConfiguration;
36+
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
37+
import org.openjdk.jmh.annotations.Benchmark;
38+
import org.openjdk.jmh.annotations.BenchmarkMode;
39+
import org.openjdk.jmh.annotations.Fork;
40+
import org.openjdk.jmh.annotations.Measurement;
41+
import org.openjdk.jmh.annotations.Mode;
42+
import org.openjdk.jmh.annotations.OutputTimeUnit;
43+
import org.openjdk.jmh.annotations.Param;
44+
import org.openjdk.jmh.annotations.Scope;
45+
import org.openjdk.jmh.annotations.Setup;
46+
import org.openjdk.jmh.annotations.State;
47+
import org.openjdk.jmh.annotations.Warmup;
48+
49+
import java.io.IOException;
50+
import java.util.ArrayList;
51+
import java.util.HashSet;
52+
import java.util.List;
53+
import java.util.Map;
54+
import java.util.Random;
55+
import java.util.Set;
56+
import java.util.concurrent.TimeUnit;
57+
58+
@Fork(value = 1)
59+
@Warmup(iterations = 2)
60+
@Measurement(iterations = 5)
61+
@BenchmarkMode(Mode.AverageTime)
62+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
63+
@State(Scope.Benchmark)
64+
public class MappingParsingBenchmark {
65+
static {
66+
// For Elasticsearch900Lucene101Codec:
67+
LogConfigurator.loadLog4jPlugins();
68+
LogConfigurator.configureESLogging();
69+
LogConfigurator.setNodeName("test");
70+
}
71+
72+
private static final String MAPPING = """
73+
{
74+
"_doc": {
75+
"dynamic": false,
76+
"properties": {
77+
"@timestamp": {
78+
"type": "date"
79+
},
80+
"host": {
81+
"type": "object",
82+
"properties": {
83+
"name": {
84+
"type": "keyword"
85+
}
86+
}
87+
},
88+
"message": {
89+
"type": "pattern_text"
90+
}
91+
}
92+
}
93+
}
94+
\s""";
95+
96+
@Param("1024")
97+
private int numIndices;
98+
99+
private List<MapperService> mapperServices;
100+
private CompressedXContent compressedMapping;
101+
102+
private Random random = new Random();
103+
private static final String CHARS = "abcdefghijklmnopqrstuvwxyz1234567890";
104+
105+
private String randomIndexName() {
106+
StringBuilder b = new StringBuilder();
107+
for (int i = 0; i < 10; i++) {
108+
b.append(CHARS.charAt(random.nextInt(CHARS.length())));
109+
}
110+
return b.toString();
111+
}
112+
113+
@Setup
114+
public void setUp() throws IOException {
115+
Settings settings = Settings.builder()
116+
.put("index.number_of_replicas", 0)
117+
.put("index.number_of_shards", 1)
118+
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
119+
.put("index.mode", "logsdb")
120+
.put("index.logsdb.sort_on_host_name", true)
121+
.put("index.logsdb.sort_on_message_template", true)
122+
.build();
123+
124+
LogsDBPlugin logsDBPlugin = new LogsDBPlugin(settings);
125+
126+
Set<Setting<?>> definedSettings = new HashSet<>(IndexScopedSettings.BUILT_IN_INDEX_SETTINGS);
127+
definedSettings.addAll(logsDBPlugin.getSettings().stream().filter(Setting::hasIndexScope).toList());
128+
IndexScopedSettings indexScopedSettings = new IndexScopedSettings(Settings.EMPTY, definedSettings);
129+
130+
mapperServices = new ArrayList<>(numIndices);
131+
for (int i = 0; i < numIndices; i++) {
132+
IndexMetadata meta = IndexMetadata.builder(randomIndexName()).settings(settings).build();
133+
IndexSettings indexSettings = new IndexSettings(meta, settings, indexScopedSettings);
134+
MapperRegistry mapperRegistry = new IndicesModule(List.of(logsDBPlugin)).getMapperRegistry();
135+
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
136+
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);
137+
MapperService mapperService = new MapperService(
138+
() -> TransportVersion.current(),
139+
indexSettings,
140+
IndexAnalyzers.of(Map.of()),
141+
XContentParserConfiguration.EMPTY.withRegistry(new NamedXContentRegistry(ClusterModule.getNamedXWriteables()))
142+
.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE),
143+
similarityService,
144+
mapperRegistry,
145+
() -> {
146+
throw new UnsupportedOperationException();
147+
},
148+
new ProvidedIdFieldMapper(() -> true),
149+
new ScriptCompiler() {
150+
@Override
151+
public <T> T compile(Script script, ScriptContext<T> scriptContext) {
152+
throw new UnsupportedOperationException();
153+
}
154+
},
155+
bitsetFilterCache::getBitSetProducer,
156+
MapperMetrics.NOOP
157+
);
158+
159+
mapperServices.add(mapperService);
160+
}
161+
162+
compressedMapping = new CompressedXContent(MAPPING);
163+
}
164+
165+
@Benchmark
166+
public void mappingParsingBenchmark() {
167+
for (MapperService service : mapperServices) {
168+
service.merge("_doc", compressedMapping, MapperService.MergeReason.MAPPING_UPDATE);
169+
}
170+
}
171+
}

docs/changelog/132388.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132388
2+
summary: Added NVIDIA support to Inference Plugin
3+
area: Machine Learning
4+
type: enhancement
5+
issues: []

docs/changelog/138803.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138803
2+
summary: Enable CPS
3+
area: SQL
4+
type: enhancement
5+
issues: []

docs/changelog/139061.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 139061
2+
summary: "Disallow index types updates to bbq_disk, revert"
3+
area: Vector Search
4+
type: bug
5+
issues: []

docs/reference/elasticsearch/index-settings/bbq.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ PUT bbq_flat-index
136136
```{applies_to}
137137
stack: ga 9.2
138138
```
139+
:::{note}
140+
This feature requires an [Enterprise subscription](https://www.elastic.co/subscriptions).
141+
:::
139142

140143
When you set a dense vector field’s `index_options` parameter to `type: bbq_disk`, {{es}} uses the DiskBBQ algorithm, a disk-based alternative to HNSW for [kNN search](https://www.elastic.co/docs//solutions/search/vector/knn) on compressed vectors. DiskBBQ stores the vector data on disk instead of in memory, lowering RAM requirements and reducing the overall cost of vector storage and search.
141144

@@ -265,4 +268,4 @@ You can change oversampling from the default 3× to another value. Refer to [Ove
265268
- [Better Binary Quantization (BBQ) in Lucene and {{es}}](https://www.elastic.co/search-labs/blog/better-binary-quantization-lucene-elasticsearch) - Learn how BBQ works, its benefits, and how it reduces memory usage while preserving search accuracy.
266269
- [Introducing a new vector storage format: DiskBBQ](https://www.elastic.co/search-labs/blog/diskbbq-elasticsearch-introduction) - Learn how DiskBBQ improves vector search in low-memory environments and compares to HNSW in speed and cost-effectiveness.
267270
- [Dense vector field type](https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/dense-vector) - Find code examples for using `bbq_hnsw` `index_type`.
268-
- [kNN search](https://www.elastic.co/docs/solutions/search/vector/knn) - Learn about the search algorithm that BBQ works with.
271+
- [kNN search](https://www.elastic.co/docs/solutions/search/vector/knn) - Learn about the search algorithm that BBQ works with.

docs/reference/elasticsearch/mapping-reference/dense-vector.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ $$$dense-vector-index-options$$$
362362
* `int8_flat` - This utilizes a brute-force search algorithm in addition to automatic scalar quantization. Only supports `element_type` of `float`.
363363
* `int4_flat` - This utilizes a brute-force search algorithm in addition to automatic half-byte scalar quantization. Only supports `element_type` of `float`.
364364
* `bbq_flat` - This utilizes a brute-force search algorithm in addition to automatic binary quantization. Only supports `element_type` of `float`.
365-
* {applies_to}`stack: ga 9.2` `bbq_disk` - This utilizes a variant of [k-means clustering algorithm](https://en.wikipedia.org/wiki/K-means_clustering) in addition to automatic binary quantization to partition vectors and search subspaces rather than an entire graph structure as in with HNSW. Only supports `element_type` of `float`. This combines the benefits of BBQ quantization with partitioning to further reduces the required memory overhead when compared with HNSW and can effectively be run at the smallest possible RAM and heap sizes when HNSW would otherwise cause swapping and grind to a halt. DiskBBQ largely scales linearly with the total RAM. And search performance is enhanced at scale as a subset of the total vector space is loaded.
365+
* {applies_to}`stack: ga 9.2` `bbq_disk` - This utilizes a variant of [k-means clustering algorithm](https://en.wikipedia.org/wiki/K-means_clustering) in addition to automatic binary quantization to partition vectors and search subspaces rather than an entire graph structure as in with HNSW. Only supports `element_type` of `float`. This combines the benefits of BBQ quantization with partitioning to further reduces the required memory overhead when compared with HNSW and can effectively be run at the smallest possible RAM and heap sizes when HNSW would otherwise cause swapping and grind to a halt. DiskBBQ largely scales linearly with the total RAM. And search performance is enhanced at scale as a subset of the total vector space is loaded. This requires an [Enterprise subscription](https://www.elastic.co/subscriptions).
366366

367367
`m`
368368
: (Optional, integer) The number of neighbors each node will be connected to in the HNSW graph. Defaults to `16`. Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.

docs/reference/query-languages/esql/_snippets/functions/description/clamp.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/description/clamp_max.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/description/clamp_min.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)