Skip to content

Commit e4c1134

Browse files
authored
Merge branch 'main' into pr-benchmarks
2 parents d7b212b + 0daa931 commit e4c1134

File tree

26 files changed

+810
-163
lines changed

26 files changed

+810
-163
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.elasticsearch.common.logging.LogConfigurator;
12+
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
13+
import org.openjdk.jmh.annotations.Benchmark;
14+
import org.openjdk.jmh.annotations.BenchmarkMode;
15+
import org.openjdk.jmh.annotations.Fork;
16+
import org.openjdk.jmh.annotations.Measurement;
17+
import org.openjdk.jmh.annotations.Mode;
18+
import org.openjdk.jmh.annotations.OutputTimeUnit;
19+
import org.openjdk.jmh.annotations.Param;
20+
import org.openjdk.jmh.annotations.Scope;
21+
import org.openjdk.jmh.annotations.Setup;
22+
import org.openjdk.jmh.annotations.State;
23+
import org.openjdk.jmh.annotations.Warmup;
24+
import org.openjdk.jmh.infra.Blackhole;
25+
26+
import java.io.IOException;
27+
import java.util.Random;
28+
import java.util.concurrent.TimeUnit;
29+
30+
@BenchmarkMode(Mode.Throughput)
31+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
32+
@State(Scope.Benchmark)
33+
// first iteration is complete garbage, so make sure we really warmup
34+
@Warmup(iterations = 4, time = 1)
35+
// real iterations. not useful to spend tons of time here, better to fork more
36+
@Measurement(iterations = 5, time = 1)
37+
// engage some noise reduction
38+
@Fork(value = 1)
39+
public class PackAsBinaryBenchmark {
40+
41+
static {
42+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
43+
}
44+
45+
@Param({ "384", "782", "1024" })
46+
int dims;
47+
48+
int length;
49+
50+
int numVectors = 1000;
51+
52+
int[][] qVectors;
53+
byte[] packed;
54+
55+
@Setup
56+
public void setup() throws IOException {
57+
Random random = new Random(123);
58+
59+
this.length = BQVectorUtils.discretize(dims, 64) / 8;
60+
this.packed = new byte[length];
61+
62+
qVectors = new int[numVectors][dims];
63+
for (int[] qVector : qVectors) {
64+
for (int i = 0; i < dims; i++) {
65+
qVector[i] = random.nextInt(2);
66+
}
67+
}
68+
}
69+
70+
@Benchmark
71+
public void packAsBinary(Blackhole bh) {
72+
for (int i = 0; i < numVectors; i++) {
73+
BQVectorUtils.packAsBinary(qVectors[i], packed);
74+
bh.consume(packed);
75+
}
76+
}
77+
78+
@Benchmark
79+
public void packAsBinaryLegacy(Blackhole bh) {
80+
for (int i = 0; i < numVectors; i++) {
81+
BQVectorUtils.packAsBinaryLegacy(qVectors[i], packed);
82+
bh.consume(packed);
83+
}
84+
}
85+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.elasticsearch.common.logging.LogConfigurator;
12+
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
13+
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
14+
import org.openjdk.jmh.annotations.Benchmark;
15+
import org.openjdk.jmh.annotations.BenchmarkMode;
16+
import org.openjdk.jmh.annotations.Fork;
17+
import org.openjdk.jmh.annotations.Measurement;
18+
import org.openjdk.jmh.annotations.Mode;
19+
import org.openjdk.jmh.annotations.OutputTimeUnit;
20+
import org.openjdk.jmh.annotations.Param;
21+
import org.openjdk.jmh.annotations.Scope;
22+
import org.openjdk.jmh.annotations.Setup;
23+
import org.openjdk.jmh.annotations.State;
24+
import org.openjdk.jmh.annotations.Warmup;
25+
import org.openjdk.jmh.infra.Blackhole;
26+
27+
import java.io.IOException;
28+
import java.util.Random;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@BenchmarkMode(Mode.Throughput)
32+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
33+
@State(Scope.Benchmark)
34+
// first iteration is complete garbage, so make sure we really warmup
35+
@Warmup(iterations = 4, time = 1)
36+
// real iterations. not useful to spend tons of time here, better to fork more
37+
@Measurement(iterations = 5, time = 1)
38+
// engage some noise reduction
39+
@Fork(value = 1)
40+
public class TransposeHalfByteBenchmark {
41+
42+
static {
43+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
44+
}
45+
46+
@Param({ "384", "782", "1024" })
47+
int dims;
48+
49+
int length;
50+
51+
int numVectors = 1000;
52+
53+
int[][] qVectors;
54+
byte[] packed;
55+
56+
@Setup
57+
public void setup() throws IOException {
58+
Random random = new Random(123);
59+
60+
this.length = 4 * BQVectorUtils.discretize(dims, 64) / 8;
61+
this.packed = new byte[length];
62+
63+
qVectors = new int[numVectors][dims];
64+
for (int[] qVector : qVectors) {
65+
for (int i = 0; i < dims; i++) {
66+
qVector[i] = random.nextInt(16);
67+
}
68+
}
69+
}
70+
71+
@Benchmark
72+
public void transposeHalfByte(Blackhole bh) {
73+
for (int i = 0; i < numVectors; i++) {
74+
BQSpaceUtils.transposeHalfByte(qVectors[i], packed);
75+
bh.consume(packed);
76+
}
77+
}
78+
79+
@Benchmark
80+
public void transposeHalfByteLegacy(Blackhole bh) {
81+
for (int i = 0; i < numVectors; i++) {
82+
BQSpaceUtils.transposeHalfByteLegacy(qVectors[i], packed);
83+
bh.consume(packed);
84+
}
85+
}
86+
}

docs/changelog/132064.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132064
2+
summary: Only Allow Enabling Streams If No Conflicting Indices Exist
3+
area: Data streams
4+
type: enhancement
5+
issues: []

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ PUT my-index-000003
107107
```
108108

109109
### Using ELSER on EIS
110-
111110
```{applies_to}
112111
stack: preview 9.1
113112
serverless: preview
@@ -223,6 +222,10 @@ generated from it. When querying, the individual passages will be automatically
223222
searched for each document, and the most relevant passage will be used to
224223
compute a score.
225224

225+
Chunks are stored as start and end character offsets rather than as separate
226+
text strings. These offsets point to the exact location of each chunk within the
227+
original input text.
228+
226229
For more details on chunking and how to configure chunking settings,
227230
see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
228231
in the Inference API documentation.
@@ -238,7 +241,8 @@ stack: ga 9.1
238241

239242
You can pre-chunk the input by sending it to Elasticsearch as an array of
240243
strings.
241-
Example:
244+
245+
For example:
242246

243247
```console
244248
PUT test-index
@@ -540,7 +544,6 @@ POST test-index/_search
540544
This will return verbose chunked embeddings content that is used to perform
541545
semantic search for `semantic_text` fields.
542546

543-
544547
## Limitations [limitations]
545548

546549
`semantic_text` field types have the following limitations:
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.reindex;
11+
12+
import org.elasticsearch.ElasticsearchException;
13+
import org.elasticsearch.action.ActionFuture;
14+
import org.elasticsearch.action.ActionListener;
15+
import org.elasticsearch.action.ActionRequest;
16+
import org.elasticsearch.action.ActionResponse;
17+
import org.elasticsearch.action.bulk.BulkItemResponse;
18+
import org.elasticsearch.action.support.ActionFilter;
19+
import org.elasticsearch.action.support.ActionFilterChain;
20+
import org.elasticsearch.plugins.ActionPlugin;
21+
import org.elasticsearch.plugins.Plugin;
22+
import org.elasticsearch.reindex.ReindexPlugin;
23+
import org.elasticsearch.tasks.Task;
24+
import org.elasticsearch.test.ESIntegTestCase;
25+
import org.junit.Before;
26+
27+
import java.util.Arrays;
28+
import java.util.Collection;
29+
import java.util.List;
30+
import java.util.concurrent.atomic.AtomicBoolean;
31+
import java.util.concurrent.atomic.AtomicInteger;
32+
import java.util.stream.Collectors;
33+
import java.util.stream.IntStream;
34+
35+
import static java.util.Collections.singletonList;
36+
import static org.elasticsearch.action.DocWriteRequest.OpType.CREATE;
37+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFutureThrows;
38+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
39+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
40+
import static org.hamcrest.Matchers.containsString;
41+
import static org.hamcrest.Matchers.empty;
42+
import static org.hamcrest.Matchers.not;
43+
44+
/**
45+
* Tests retrying a failed reindex operation
46+
*/
47+
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
48+
public class RetryFailedReindexIT extends ESIntegTestCase {
49+
private static final String INDEX = "source-index";
50+
private static final String DEST_INDEX = "dest-index";
51+
private static final int NUM_DOCS = 100;
52+
private static final int NUM_PARTIAL_DOCS = 70;
53+
private static final AtomicBoolean FILTER_ENABLED = new AtomicBoolean(false);
54+
private static final AtomicInteger DOC_COUNT = new AtomicInteger(0);
55+
56+
@Override
57+
protected Collection<Class<? extends Plugin>> nodePlugins() {
58+
return Arrays.asList(ReindexPlugin.class, TestPlugin.class);
59+
}
60+
61+
@Before
62+
public void reset() {
63+
FILTER_ENABLED.set(false);
64+
DOC_COUNT.set(0);
65+
}
66+
67+
public void testRetryFailedReindex() throws Exception {
68+
createIndex(INDEX);
69+
indexRandom(
70+
true,
71+
false,
72+
true,
73+
IntStream.range(0, NUM_DOCS)
74+
.mapToObj(i -> prepareIndex(INDEX).setId(Integer.toString(i)).setSource("n", Integer.toString(i)))
75+
.collect(Collectors.toList())
76+
);
77+
assertHitCount(prepareSearch(INDEX).setSize(0).setTrackTotalHits(true), NUM_DOCS);
78+
79+
// Fail reindex and end up in partial state
80+
FILTER_ENABLED.set(true);
81+
assertFutureThrows(reindex(true), TestException.class);
82+
FILTER_ENABLED.set(false);
83+
84+
// Run into conflicts with partial destination index
85+
assertResponse(reindex(true), res -> {
86+
assertThat(res.getBulkFailures(), not(empty()));
87+
for (BulkItemResponse.Failure failure : res.getBulkFailures()) {
88+
assertThat(failure.getMessage(), containsString("VersionConflictEngineException: ["));
89+
}
90+
});
91+
92+
// Bypass conflicts and complete reindex
93+
assertResponse(reindex(false), res -> { assertThat(res.getBulkFailures(), empty()); });
94+
assertBusy(() -> { assertHitCount(prepareSearch(DEST_INDEX).setSize(0).setTrackTotalHits(true), NUM_DOCS); });
95+
}
96+
97+
private ActionFuture<BulkByScrollResponse> reindex(boolean abortOnVersionConflict) {
98+
ReindexRequestBuilder builder = new ReindexRequestBuilder(internalCluster().client());
99+
builder.source(INDEX).destination(DEST_INDEX).abortOnVersionConflict(abortOnVersionConflict);
100+
builder.source().setSize(1);
101+
builder.destination().setOpType(CREATE);
102+
return builder.execute();
103+
}
104+
105+
private static class TestException extends ElasticsearchException {
106+
TestException() {
107+
super("Injected index failure");
108+
}
109+
}
110+
111+
public static class TestPlugin extends Plugin implements ActionPlugin {
112+
@Override
113+
public List<ActionFilter> getActionFilters() {
114+
return singletonList(new ActionFilter() {
115+
@Override
116+
public int order() {
117+
return Integer.MIN_VALUE;
118+
}
119+
120+
@Override
121+
public <Request extends ActionRequest, Response extends ActionResponse> void apply(
122+
Task task,
123+
String action,
124+
Request request,
125+
ActionListener<Response> listener,
126+
ActionFilterChain<Request, Response> chain
127+
) {
128+
if (FILTER_ENABLED.get()
129+
&& action.equals("indices:data/write/bulk")
130+
&& DOC_COUNT.incrementAndGet() > NUM_PARTIAL_DOCS) {
131+
listener.onFailure(new TestException());
132+
} else {
133+
chain.proceed(task, action, request, listener);
134+
}
135+
}
136+
137+
});
138+
}
139+
}
140+
}

modules/streams/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ esplugin {
2020

2121
restResources {
2222
restApi {
23-
include '_common', 'streams', "bulk", "index", "ingest", "indices", "delete_by_query", "search"
23+
include '_common', 'streams', 'bulk', 'index', 'ingest', 'indices', 'delete_by_query', 'search'
2424
}
2525
}
2626

0 commit comments

Comments
 (0)