-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Allow zero for rescore_vector.oversample to indicate by-passing oversample and rescoring #125599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
b7fb14f
60846bb
6f7dfc0
9a6806f
2e15157
e73ef1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 125599 | ||
| summary: Allow zero for `rescore_vector.oversample` to indicate by-passing oversample | ||
| and rescoring | ||
| area: Vector Search | ||
| type: enhancement | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,6 +115,8 @@ public static boolean isNotUnitVector(float magnitude) { | |
| public static final IndexVersion DEFAULT_TO_INT8 = DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; | ||
| public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0; | ||
| public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS; | ||
| public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = | ||
| IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; | ||
|
|
||
| public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector"); | ||
|
|
||
|
|
@@ -1321,7 +1323,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti | |
| } | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new Int8HnswIndexOptions(m, efConstruction, confidenceInterval, rescoreVector); | ||
|
|
@@ -1356,7 +1358,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti | |
| } | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new Int4HnswIndexOptions(m, efConstruction, confidenceInterval, rescoreVector); | ||
|
|
@@ -1399,7 +1401,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti | |
| } | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new Int8FlatIndexOptions(confidenceInterval, rescoreVector); | ||
|
|
@@ -1425,7 +1427,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti | |
| } | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new Int4FlatIndexOptions(confidenceInterval, rescoreVector); | ||
|
|
@@ -1456,7 +1458,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti | |
| int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode); | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new BBQHnswIndexOptions(m, efConstruction, rescoreVector); | ||
|
|
@@ -1477,7 +1479,7 @@ public boolean supportsDimension(int dims) { | |
| public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOptionsMap, IndexVersion indexVersion) { | ||
| RescoreVector rescoreVector = null; | ||
| if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); | ||
| rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); | ||
| } | ||
| MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); | ||
| return new BBQFlatIndexOptions(rescoreVector); | ||
|
|
@@ -1991,7 +1993,7 @@ record RescoreVector(float oversample) implements ToXContentObject { | |
| static final String NAME = "rescore_vector"; | ||
| static final String OVERSAMPLE = "oversample"; | ||
|
|
||
| static RescoreVector fromIndexOptions(Map<String, ?> indexOptionsMap) { | ||
| static RescoreVector fromIndexOptions(Map<String, ?> indexOptionsMap, IndexVersion indexVersion) { | ||
| Object rescoreVectorNode = indexOptionsMap.remove(NAME); | ||
| if (rescoreVectorNode == null) { | ||
| return null; | ||
|
|
@@ -2001,16 +2003,17 @@ static RescoreVector fromIndexOptions(Map<String, ?> indexOptionsMap) { | |
| if (oversampleNode == null) { | ||
| throw new IllegalArgumentException("Invalid rescore_vector value. Missing required field " + OVERSAMPLE); | ||
| } | ||
| return new RescoreVector((float) XContentMapValues.nodeDoubleValue(oversampleNode)); | ||
| } | ||
|
|
||
| RescoreVector { | ||
| if (oversample < 1) { | ||
| float oversampleValue = (float) XContentMapValues.nodeDoubleValue(oversampleNode); | ||
| if (oversampleValue == 0 && indexVersion.before(RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS)) { | ||
| throw new IllegalArgumentException("oversample must be greater than 1"); | ||
| } | ||
| if (oversampleValue < 1) { | ||
|
||
| throw new IllegalArgumentException("oversample must be greater than 1"); | ||
| } | ||
| if (oversample > 10) { | ||
| if (oversampleValue > 10) { | ||
| throw new IllegalArgumentException("oversample must be less than or equal to 10"); | ||
| } | ||
| return new RescoreVector(oversampleValue); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -2177,7 +2180,7 @@ public Query createKnnQuery( | |
| } | ||
|
|
||
| private boolean needsRescore(Float rescoreOversample) { | ||
| return rescoreOversample != null && isQuantized(); | ||
| return rescoreOversample != null && rescoreOversample > 0 && isQuantized(); | ||
| } | ||
|
|
||
| private boolean isQuantized() { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -492,6 +492,22 @@ public void testRescoreOversampleModifiesNumCandidates() { | |
| checkRescoreQueryParameters(fieldType, 1000, 1000, 11.0F, OVERSAMPLE_LIMIT, OVERSAMPLE_LIMIT, 1000); | ||
| } | ||
|
|
||
| public void testRescoreOversampleZeroBypassesRescore() { | ||
| DenseVectorFieldType fieldType = new DenseVectorFieldType( | ||
| "f", | ||
| IndexVersion.current(), | ||
| FLOAT, | ||
| 3, | ||
| true, | ||
| VectorSimilarity.COSINE, | ||
| randomIndexOptionsHnswQuantized(), | ||
| Collections.emptyMap() | ||
| ); | ||
|
|
||
| Query query = fieldType.createKnnQuery(VectorData.fromFloats(new float[] { 1, 4, 10 }), 10, 100, 0f, null, null, null); | ||
| assertTrue(query instanceof ESKnnFloatVectorQuery); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to check that the query parameters do not include rescoring when 0 is used in the query. Also, we should probably check that we can't set 0 for previous index versions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ I will add a test |
||
| } | ||
|
|
||
| private static void checkRescoreQueryParameters( | ||
| DenseVectorFieldType fieldType, | ||
| int k, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.