Skip to content

Commit 5361c0b

Browse files
authored
Merge branch 'main' into semantic_text_chunks_format
2 parents 188c92a + d90055e commit 5361c0b

File tree

52 files changed

+1135
-687
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1135
-687
lines changed

docs/changelog/132408.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132408
2+
summary: Correct exception for missing nested path
3+
area: Search
4+
type: bug
5+
issues: []

docs/changelog/132414.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132414
2+
summary: Adjust date docvalue formatting to return 4xx instead of 5xx
3+
area: Search
4+
type: bug
5+
issues: []

docs/changelog/92568.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92568
2+
summary: Support nested fields for term vectors API when using artificial documents
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 91902

docs/reference/query-languages/query-dsl/query-dsl-pinned-query.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ mapped_pages:
77

88
Promotes selected documents to rank higher than those matching a given query. This feature is typically used to guide searchers to curated documents that are promoted over and above any "organic" matches for a search. The promoted or "pinned" documents are identified using the document IDs stored in the [`_id`](/reference/elasticsearch/mapping-reference/mapping-id-field.md) field.
99

10+
::::{note}
11+
Pinned queries are designed to work only with relevance-based sorting. Using explicit `sort` criteria overrides the pinned document promotion.
12+
::::
13+
1014
## Example request [_example_request]
1115

1216
```console

libs/simdvec/src/main/java/org/elasticsearch/simdvec/ES91OSQVectorsScorer.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ public float score(
141141
*
142142
* <p>The results are stored in the provided scores array.
143143
*/
144-
public void scoreBulk(
144+
public float scoreBulk(
145145
byte[] q,
146146
float queryLowerInterval,
147147
float queryUpperInterval,
@@ -158,6 +158,7 @@ public void scoreBulk(
158158
targetComponentSums[i] = Short.toUnsignedInt(in.readShort());
159159
}
160160
in.readFloats(additionalCorrections, 0, BULK_SIZE);
161+
float maxScore = Float.NEGATIVE_INFINITY;
161162
for (int i = 0; i < BULK_SIZE; i++) {
162163
scores[i] = score(
163164
queryLowerInterval,
@@ -172,6 +173,10 @@ public void scoreBulk(
172173
additionalCorrections[i],
173174
scores[i]
174175
);
176+
if (scores[i] > maxScore) {
177+
maxScore = scores[i];
178+
}
175179
}
180+
return maxScore;
176181
}
177182
}

libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ public void calculateOSQGridPoints(float[] target, int[] quantize, int points, f
8080
float dbb = 0;
8181
float dax = 0;
8282
float dbx = 0;
83+
float invPmOnes = 1f / (points - 1f);
8384
for (int i = 0; i < target.length; ++i) {
8485
float v = target[i];
8586
float k = quantize[i];
86-
float s = k / (points - 1);
87+
float s = k * invPmOnes;
8788
float ms = 1f - s;
8889
daa = fma(ms, ms, daa);
8990
dab = fma(ms, s, dab);

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/MemorySegmentES91OSQVectorsScorer.java

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ private void quantizeScore256Bulk(byte[] q, int count, float[] scores) throws IO
352352
}
353353

354354
@Override
355-
public void scoreBulk(
355+
public float scoreBulk(
356356
byte[] q,
357357
float queryLowerInterval,
358358
float queryUpperInterval,
@@ -366,7 +366,7 @@ public void scoreBulk(
366366
// 128 / 8 == 16
367367
if (length >= 16 && PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS) {
368368
if (PanamaESVectorUtilSupport.VECTOR_BITSIZE >= 256) {
369-
score256Bulk(
369+
return score256Bulk(
370370
q,
371371
queryLowerInterval,
372372
queryUpperInterval,
@@ -376,9 +376,8 @@ public void scoreBulk(
376376
centroidDp,
377377
scores
378378
);
379-
return;
380379
} else if (PanamaESVectorUtilSupport.VECTOR_BITSIZE == 128) {
381-
score128Bulk(
380+
return score128Bulk(
382381
q,
383382
queryLowerInterval,
384383
queryUpperInterval,
@@ -388,10 +387,9 @@ public void scoreBulk(
388387
centroidDp,
389388
scores
390389
);
391-
return;
392390
}
393391
}
394-
super.scoreBulk(
392+
return super.scoreBulk(
395393
q,
396394
queryLowerInterval,
397395
queryUpperInterval,
@@ -403,7 +401,7 @@ public void scoreBulk(
403401
);
404402
}
405403

406-
private void score128Bulk(
404+
private float score128Bulk(
407405
byte[] q,
408406
float queryLowerInterval,
409407
float queryUpperInterval,
@@ -420,6 +418,7 @@ private void score128Bulk(
420418
float ay = queryLowerInterval;
421419
float ly = (queryUpperInterval - ay) * FOUR_BIT_SCALE;
422420
float y1 = queryComponentSum;
421+
float maxScore = Float.NEGATIVE_INFINITY;
423422
for (; i < limit; i += FLOAT_SPECIES_128.length()) {
424423
var ax = FloatVector.fromMemorySegment(FLOAT_SPECIES_128, memorySegment, offset + i * Float.BYTES, ByteOrder.LITTLE_ENDIAN);
425424
var lx = FloatVector.fromMemorySegment(
@@ -453,6 +452,7 @@ private void score128Bulk(
453452
if (similarityFunction == EUCLIDEAN) {
454453
res = res.mul(-2).add(additionalCorrections).add(queryAdditionalCorrection).add(1f);
455454
res = FloatVector.broadcast(FLOAT_SPECIES_128, 1).div(res).max(0);
455+
maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
456456
res.intoArray(scores, i);
457457
} else {
458458
// For cosine and max inner product, we need to apply the additional correction, which is
@@ -463,17 +463,20 @@ private void score128Bulk(
463463
// not sure how to do it better
464464
for (int j = 0; j < FLOAT_SPECIES_128.length(); j++) {
465465
scores[i + j] = VectorUtil.scaleMaxInnerProductScore(scores[i + j]);
466+
maxScore = Math.max(maxScore, scores[i + j]);
466467
}
467468
} else {
468469
res = res.add(1f).mul(0.5f).max(0);
469470
res.intoArray(scores, i);
471+
maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
470472
}
471473
}
472474
}
473475
in.seek(offset + 14L * BULK_SIZE);
476+
return maxScore;
474477
}
475478

476-
private void score256Bulk(
479+
private float score256Bulk(
477480
byte[] q,
478481
float queryLowerInterval,
479482
float queryUpperInterval,
@@ -490,6 +493,7 @@ private void score256Bulk(
490493
float ay = queryLowerInterval;
491494
float ly = (queryUpperInterval - ay) * FOUR_BIT_SCALE;
492495
float y1 = queryComponentSum;
496+
float maxScore = Float.NEGATIVE_INFINITY;
493497
for (; i < limit; i += FLOAT_SPECIES_256.length()) {
494498
var ax = FloatVector.fromMemorySegment(FLOAT_SPECIES_256, memorySegment, offset + i * Float.BYTES, ByteOrder.LITTLE_ENDIAN);
495499
var lx = FloatVector.fromMemorySegment(
@@ -523,6 +527,7 @@ private void score256Bulk(
523527
if (similarityFunction == EUCLIDEAN) {
524528
res = res.mul(-2).add(additionalCorrections).add(queryAdditionalCorrection).add(1f);
525529
res = FloatVector.broadcast(FLOAT_SPECIES_256, 1).div(res).max(0);
530+
maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
526531
res.intoArray(scores, i);
527532
} else {
528533
// For cosine and max inner product, we need to apply the additional correction, which is
@@ -533,13 +538,16 @@ private void score256Bulk(
533538
// not sure how to do it better
534539
for (int j = 0; j < FLOAT_SPECIES_256.length(); j++) {
535540
scores[i + j] = VectorUtil.scaleMaxInnerProductScore(scores[i + j]);
541+
maxScore = Math.max(maxScore, scores[i + j]);
536542
}
537543
} else {
538544
res = res.add(1f).mul(0.5f).max(0);
545+
maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
539546
res.intoArray(scores, i);
540547
}
541548
}
542549
}
543550
in.seek(offset + 14L * BULK_SIZE);
551+
return maxScore;
544552
}
545553
}

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public void centerAndCalculateOSQStatsEuclidean(float[] vector, float[] centroid
132132
FloatVector centeredVec = v.sub(c);
133133
FloatVector deltaVec = centeredVec.sub(vecMeanVec);
134134
norm2Vec = fma(centeredVec, centeredVec, norm2Vec);
135-
vecMeanVec = vecMeanVec.add(deltaVec.div(count));
135+
vecMeanVec = vecMeanVec.add(deltaVec.mul(1f / count));
136136
FloatVector delta2Vec = centeredVec.sub(vecMeanVec);
137137
m2Vec = fma(deltaVec, delta2Vec, m2Vec);
138138
minVec = minVec.min(centeredVec);
@@ -214,7 +214,7 @@ public void centerAndCalculateOSQStatsDp(float[] vector, float[] centroid, float
214214
FloatVector centeredVec = v.sub(c);
215215
FloatVector deltaVec = centeredVec.sub(vecMeanVec);
216216
norm2Vec = fma(centeredVec, centeredVec, norm2Vec);
217-
vecMeanVec = vecMeanVec.add(deltaVec.div(count));
217+
vecMeanVec = vecMeanVec.add(deltaVec.mul(1f / count));
218218
FloatVector delta2Vec = centeredVec.sub(vecMeanVec);
219219
m2Vec = fma(deltaVec, delta2Vec, m2Vec);
220220
minVec = minVec.min(centeredVec);
@@ -278,6 +278,7 @@ public void calculateOSQGridPoints(float[] target, int[] quantize, int points, f
278278
float dbb = 0;
279279
float dax = 0;
280280
float dbx = 0;
281+
float invPmOnes = 1f / (points - 1f);
281282
// if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
282283
if (target.length > 2 * FLOAT_SPECIES.length()) {
283284
FloatVector daaVec = FloatVector.zero(FLOAT_SPECIES);
@@ -286,11 +287,11 @@ public void calculateOSQGridPoints(float[] target, int[] quantize, int points, f
286287
FloatVector daxVec = FloatVector.zero(FLOAT_SPECIES);
287288
FloatVector dbxVec = FloatVector.zero(FLOAT_SPECIES);
288289
FloatVector ones = FloatVector.broadcast(FLOAT_SPECIES, 1f);
289-
FloatVector pmOnes = FloatVector.broadcast(FLOAT_SPECIES, points - 1f);
290+
FloatVector invPmOnesVec = FloatVector.broadcast(FLOAT_SPECIES, invPmOnes);
290291
for (; i < FLOAT_SPECIES.loopBound(target.length); i += FLOAT_SPECIES.length()) {
291292
FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, target, i);
292293
FloatVector oVec = IntVector.fromArray(INTEGER_SPECIES, quantize, i).convert(VectorOperators.I2F, 0).reinterpretAsFloats();
293-
FloatVector sVec = oVec.div(pmOnes);
294+
FloatVector sVec = oVec.mul(invPmOnesVec);
294295
FloatVector smVec = ones.sub(sVec);
295296
daaVec = fma(smVec, smVec, daaVec);
296297
dabVec = fma(smVec, sVec, dabVec);
@@ -307,7 +308,7 @@ public void calculateOSQGridPoints(float[] target, int[] quantize, int points, f
307308

308309
for (; i < target.length; i++) {
309310
float k = quantize[i];
310-
float s = k / (points - 1);
311+
float s = k * invPmOnes;
311312
float ms = 1f - s;
312313
daa = fma(ms, ms, daa);
313314
dab = fma(ms, s, dab);
@@ -798,25 +799,26 @@ public static float ipFloatByteImpl(float[] q, byte[] d) {
798799
@Override
799800
public int quantizeVectorWithIntervals(float[] vector, int[] destination, float lowInterval, float upperInterval, byte bits) {
800801
float nSteps = ((1 << bits) - 1);
801-
float step = (upperInterval - lowInterval) / nSteps;
802+
float invStep = nSteps / (upperInterval - lowInterval);
802803
int sumQuery = 0;
803804
int i = 0;
804805
if (vector.length > 2 * FLOAT_SPECIES.length()) {
805806
int limit = FLOAT_SPECIES.loopBound(vector.length);
806807
FloatVector lowVec = FloatVector.broadcast(FLOAT_SPECIES, lowInterval);
807808
FloatVector upperVec = FloatVector.broadcast(FLOAT_SPECIES, upperInterval);
808-
FloatVector stepVec = FloatVector.broadcast(FLOAT_SPECIES, step);
809+
FloatVector invStepVec = FloatVector.broadcast(FLOAT_SPECIES, invStep);
809810
for (; i < limit; i += FLOAT_SPECIES.length()) {
810811
FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i);
811812
FloatVector xi = v.max(lowVec).min(upperVec); // clamp
812-
IntVector assignment = xi.sub(lowVec).div(stepVec).add(0.5f).convert(VectorOperators.F2I, 0).reinterpretAsInts(); // round
813+
// round
814+
IntVector assignment = xi.sub(lowVec).mul(invStepVec).add(0.5f).convert(VectorOperators.F2I, 0).reinterpretAsInts();
813815
sumQuery += assignment.reduceLanes(ADD);
814816
assignment.intoArray(destination, i);
815817
}
816818
}
817819
for (; i < vector.length; i++) {
818820
float xi = Math.min(Math.max(vector[i], lowInterval), upperInterval);
819-
int assignment = Math.round((xi - lowInterval) / step);
821+
int assignment = Math.round((xi - lowInterval) * invStep);
820822
sumQuery += assignment;
821823
destination[i] = assignment;
822824
}

0 commit comments

Comments
 (0)