Skip to content

Commit 80537a4

Browse files
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-chat-completion
2 parents 136416d + 72b5c01 commit 80537a4

File tree

43 files changed

+497
-275
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+497
-275
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/vector/OptimizedScalarQuantizerBenchmark.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ public class OptimizedScalarQuantizerBenchmark {
4343

4444
float[] vector;
4545
float[] centroid;
46-
byte[] destination;
46+
byte[] legacyDestination;
47+
int[] destination;
4748

4849
@Param({ "1", "4", "7" })
4950
byte bits;
@@ -54,7 +55,8 @@ public class OptimizedScalarQuantizerBenchmark {
5455
public void init() {
5556
ThreadLocalRandom random = ThreadLocalRandom.current();
5657
// random byte arrays for binary methods
57-
destination = new byte[dims];
58+
legacyDestination = new byte[dims];
59+
destination = new int[dims];
5860
vector = new float[dims];
5961
centroid = new float[dims];
6062
for (int i = 0; i < dims; ++i) {
@@ -65,13 +67,20 @@ public void init() {
6567

6668
@Benchmark
6769
public byte[] scalar() {
68-
osq.scalarQuantize(vector, destination, bits, centroid);
69-
return destination;
70+
osq.legacyScalarQuantize(vector, legacyDestination, bits, centroid);
71+
return legacyDestination;
72+
}
73+
74+
@Benchmark
75+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
76+
public byte[] legacyVector() {
77+
osq.legacyScalarQuantize(vector, legacyDestination, bits, centroid);
78+
return legacyDestination;
7079
}
7180

7281
@Benchmark
7382
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
74-
public byte[] vector() {
83+
public int[] vector() {
7584
osq.scalarQuantize(vector, destination, bits, centroid);
7685
return destination;
7786
}

docs/reference/elasticsearch/configuration-reference/thread-pool-settings.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ $$$search-throttled$$$`search_throttled`
5959
`flush`
6060
: For [flush](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-flush) and [translog](/reference/elasticsearch/index-settings/translog.md) `fsync` operations. Thread pool type is `scaling` with a keep-alive of `5m` and a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`.
6161

62+
`merge`
63+
: For [merge](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-merge.html) operations of all the shards on the node. Thread pool type is `scaling` with a keep-alive of `5m` and a default maximum size of [`# of allocated processors`](#node.processors).
64+
6265
`force_merge`
63-
: For [force merge](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) operations. Thread pool type is `fixed` with a size of `max(1, (`[`# of allocated processors`](#node.processors)`) / 8)` and an unbounded queue size.
66+
: For waiting on blocking [force merge](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) operations. Thread pool type is `fixed` with a size of `max(1, (`[`# of allocated processors`](#node.processors)`) / 8)` and an unbounded queue size.
6467

6568
`management`
6669
: For cluster management. Thread pool type is `scaling` with a keep-alive of `5m` and a default maximum size of `5`.

docs/reference/elasticsearch/index-settings/merge.md

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,32 @@ The merge process uses auto-throttling to balance the use of hardware resources
1818

1919
## Merge scheduling [merge-scheduling]
2020

21-
The merge scheduler (ConcurrentMergeScheduler) controls the execution of merge operations when they are needed. Merges run in separate threads, and when the maximum number of threads is reached, further merges will wait until a merge thread becomes available.
21+
The merge scheduler controls the execution of merge operations when they are needed.
22+
Merges run on the dedicated `merge` thread pool.
23+
Smaller merges are prioritized over larger ones, across all shards on the node.
24+
Merges are disk IO throttled so that bursts, while merging activity is otherwise low, are smoothed out in order to not impact indexing throughput.
25+
There is no limit on the number of merges that can be enqueued for execution on the thread pool.
26+
However, beyond a certain per-shard limit, after merging is completely disk IO un-throttled, indexing for the shard will itself be throttled until merging catches up.
2227

23-
The merge scheduler supports the following *dynamic* setting:
28+
The available disk space is periodically monitored, such that no new merge tasks are scheduled for execution when the available disk space is low.
29+
This is in order to prevent that the temporary disk space, which is required while merges are executed, completely fills up the disk space on the node.
30+
31+
The merge scheduler supports the following *dynamic* settings:
2432

2533
`index.merge.scheduler.max_thread_count`
26-
: The maximum number of threads on a single shard that may be merging at once. Defaults to `Math.max(1, Math.min(4, <<node.processors, node.processors>> / 2))` which works well for a good solid-state-disk (SSD). If your index is on spinning platter drives instead, decrease this to 1.
34+
: The maximum number of threads on a **single** shard that may be merging at once. Defaults to `Math.max(1, Math.min(4, <<node.processors, node.processors>> / 2))` which works well for a good solid-state-disk (SSD). If your index is on spinning platter drives instead, decrease this to 1.
35+
36+
`indices.merge.disk.check_interval`
37+
: The time interval for checking the available disk space. Defaults to `5s`.
38+
39+
`indices.merge.disk.watermark.high`
40+
: Controls the disk usage watermark, which defaults to `95%`, beyond which no merge tasks can start execution.
41+
The disk usage tally includes the estimated temporary disk space still required by all the currently executing merge tasks.
42+
Any merge task scheduled *before* the limit is reached continues execution, even if the limit is exceeded while executing
43+
(merge tasks are not aborted).
44+
45+
`indices.merge.disk.watermark.high.max_headroom`
46+
: Controls the max headroom for the merge disk usage watermark, in case it is specified as percentage or ratio values.
47+
Defaults to `100GB` when `indices.merge.disk.watermark.high` is not explicitly set.
48+
This caps the amount of free disk space before merge scheduling is blocked.
2749

docs/reference/query-languages/esql/images/functions/knn.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/reference/query-languages/esql/kibana/definition/functions/knn.json

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/kibana/docs/functions/knn.md

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,4 +258,25 @@ public static float soarDistance(float[] v1, float[] centroid, float[] originalR
258258
}
259259
return IMPL.soarDistance(v1, centroid, originalResidual, soarLambda, rnorm);
260260
}
261+
262+
/**
263+
* Optimized-scalar quantization of the provided vector to the provided destination array.
264+
*
265+
* @param vector the vector to quantize
266+
* @param destination the array to store the result
267+
* @param lowInterval the minimum value, lower values in the original array will be replaced by this value
268+
* @param upperInterval the maximum value, bigger values in the original array will be replaced by this value
269+
* @param bit the number of bits to use for quantization, must be between 1 and 8
270+
*
271+
* @return return the sum of all the elements of the resulting quantized vector.
272+
*/
273+
public static int quantizeVectorWithIntervals(float[] vector, int[] destination, float lowInterval, float upperInterval, byte bit) {
274+
if (vector.length > destination.length) {
275+
throw new IllegalArgumentException("vector dimensions differ: " + vector.length + "!=" + destination.length);
276+
}
277+
if (bit <= 0 || bit > Byte.SIZE) {
278+
throw new IllegalArgumentException("bit must be between 1 and 8, but was: " + bit);
279+
}
280+
return IMPL.quantizeVectorWithIntervals(vector, destination, lowInterval, upperInterval, bit);
281+
}
261282
}

libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,4 +269,18 @@ public static float ipFloatByteImpl(float[] q, byte[] d) {
269269
}
270270
return ret;
271271
}
272+
273+
@Override
274+
public int quantizeVectorWithIntervals(float[] vector, int[] destination, float lowInterval, float upperInterval, byte bits) {
275+
float nSteps = ((1 << bits) - 1);
276+
float step = (upperInterval - lowInterval) / nSteps;
277+
int sumQuery = 0;
278+
for (int h = 0; h < vector.length; h++) {
279+
float xi = Math.min(Math.max(vector[h], lowInterval), upperInterval);
280+
int assignment = Math.round((xi - lowInterval) / step);
281+
sumQuery += assignment;
282+
destination[h] = assignment;
283+
}
284+
return sumQuery;
285+
}
272286
}

libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,6 @@ public interface ESVectorUtilSupport {
3939

4040
float soarDistance(float[] v1, float[] centroid, float[] originalResidual, float soarLambda, float rnorm);
4141

42+
int quantizeVectorWithIntervals(float[] vector, int[] quantize, float lowInterval, float upperInterval, byte bit);
43+
4244
}

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,4 +791,32 @@ public static float ipFloatByteImpl(float[] q, byte[] d) {
791791

792792
return sum;
793793
}
794+
795+
@Override
796+
public int quantizeVectorWithIntervals(float[] vector, int[] destination, float lowInterval, float upperInterval, byte bits) {
797+
float nSteps = ((1 << bits) - 1);
798+
float step = (upperInterval - lowInterval) / nSteps;
799+
int sumQuery = 0;
800+
int i = 0;
801+
if (vector.length > 2 * FLOAT_SPECIES.length()) {
802+
int limit = FLOAT_SPECIES.loopBound(vector.length);
803+
FloatVector lowVec = FloatVector.broadcast(FLOAT_SPECIES, lowInterval);
804+
FloatVector upperVec = FloatVector.broadcast(FLOAT_SPECIES, upperInterval);
805+
FloatVector stepVec = FloatVector.broadcast(FLOAT_SPECIES, step);
806+
for (; i < limit; i += FLOAT_SPECIES.length()) {
807+
FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i);
808+
FloatVector xi = v.max(lowVec).min(upperVec); // clamp
809+
IntVector assignment = xi.sub(lowVec).div(stepVec).add(0.5f).convert(VectorOperators.F2I, 0).reinterpretAsInts(); // round
810+
sumQuery += assignment.reduceLanes(ADD);
811+
assignment.intoArray(destination, i);
812+
}
813+
}
814+
for (; i < vector.length; i++) {
815+
float xi = Math.min(Math.max(vector[i], lowInterval), upperInterval);
816+
int assignment = Math.round((xi - lowInterval) / step);
817+
sumQuery += assignment;
818+
destination[i] = assignment;
819+
}
820+
return sumQuery;
821+
}
794822
}

0 commit comments

Comments
 (0)