Skip to content
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
setup:
- requires:
cluster_features: "mapper.ivf_nested_support"
reason: 'ivf nested support required'
- do:
indices.create:
index: test
body:
settings:
index:
number_of_shards: 1
mappings:
properties:
name:
type: keyword
nested:
type: nested
properties:
paragraph_id:
type: keyword
vector:
type: dense_vector
dims: 5
index: true
similarity: l2_norm
index_options:
type: bbq_ivf

aliases:
my_alias:
filter:
term:
name: "rabbit.jpg"

- do:
index:
index: test
id: "1"
body:
name: cow.jpg
nested:
- paragraph_id: 0
vector: [230, 300.33, -34.8988, 15.555, -200]
- paragraph_id: 1
vector: [240, 300, -3, 1, -20]

- do:
index:
index: test
id: "2"
body:
name: moose.jpg
nested:
- paragraph_id: 0
vector: [-0.5, 100, -13, 14.8, -156]
- paragraph_id: 2
vector: [0, 100, 0, 14.8, -156]

- do:
index:
index: test
id: "3"
body:
name: rabbit.jpg
nested:
- paragraph_id: 0
vector: [0.5, 111.3, -13, 14.8, -156]
- do:
indices.forcemerge:
index: test
max_num_segments: 1

- do:
indices.refresh: {}

---
"nested kNN search that returns diverse parents docs":
- do:
search:
index: test
body:
fields: [ "name" ]
query:
nested:
path: nested
query:
knn:
field: nested.vector
query_vector: [-0.5, 90, -10, 14.8, -156]
num_candidates: 3
- match: {hits.total.value: 3}

- match: {hits.hits.0._id: "2"}
- match: {hits.hits.0.fields.name.0: "moose.jpg"}

- match: {hits.hits.1._id: "3"}
- match: {hits.hits.1.fields.name.0: "rabbit.jpg"}

- do:
search:
index: test
body:
fields: [ "name" ]
query:
nested:
path: nested
query:
knn:
field: nested.vector
query_vector: [ -0.5, 90, -10, 14.8, -156 ]
num_candidates: 3
inner_hits: { size: 1, "fields": [ "nested.paragraph_id" ], _source: false }

- match: {hits.total.value: 3}

- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0.fields.name.0: "moose.jpg" }
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0" }

- match: { hits.hits.1._id: "3" }
- match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
- match: { hits.hits.1.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0" }

- match: { hits.hits.2._id: "1" }
- match: { hits.hits.2.fields.name.0: "cow.jpg" }
- match: { hits.hits.2.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0" }

---
"nested kNN search pre-filtered on alias with filter on top level fields":
- do:
search:
index: my_alias # filter on name: "rabbit.jpg"
body:
fields: [ "name" ]
query:
nested:
path: nested
query:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
num_candidates: 1
inner_hits: { size: 1, "fields": [ "nested.paragraph_id" ], _source: false }

- match: {hits.total.value: 1} # as alias is passed as pre-filter, we get a single result
- match: {hits.hits.0._id: "3"}
- match: {hits.hits.0.fields.name.0: "rabbit.jpg"}
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0" }

---
"nested kNN search post-filtered on top level fields":
- do:
search:
index: test
body:
fields: [ "name" ]
query:
bool:
must:
- term:
name: "rabbit.jpg"
- nested:
path: nested
query:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
num_candidates: 1
- match: { hits.total.value: 0 } # no hits because returned single vector did not pass post-filter

- do:
search:
index: test
body:
fields: [ "name" ]
query:
bool:
must:
- term:
name: "rabbit.jpg"
- nested:
path: nested
query:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
num_candidates: 3
inner_hits: { size: 1, fields: [ "nested.paragraph_id" ], _source: false }

- match: {hits.total.value: 1}
- match: {hits.hits.0._id: "3"}
- match: {hits.hits.0.fields.name.0: "rabbit.jpg"}
- match: { hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0" }
Original file line number Diff line number Diff line change
Expand Up @@ -198,47 +198,6 @@ setup:
index_options:
type: bbq_ivf
---
"Test few dimensions fail indexing":
- do:
catch: bad_request
indices.create:
index: bad_bbq_ivf
body:
mappings:
properties:
vector:
type: dense_vector
dims: 42
index: true
index_options:
type: bbq_ivf

- do:
indices.create:
index: dynamic_dim_bbq_ivf
body:
mappings:
properties:
vector:
type: dense_vector
index: true
similarity: l2_norm
index_options:
type: bbq_ivf

- do:
catch: bad_request
index:
index: dynamic_dim_bbq_ivf
body:
vector: [1.0, 2.0, 3.0, 4.0, 5.0]

- do:
index:
index: dynamic_dim_bbq_ivf
body:
vector: [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0]
---
"Test index configured rescore vector":
- skip:
features: "headers"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.search.AbstractKnnCollector;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
Expand Down Expand Up @@ -252,7 +253,10 @@ public final void search(String field, float[] target, KnnCollector knnCollector
}
return visitedDocs.getAndSet(docId) == false;
};
assert knnCollector instanceof AbstractKnnCollector;
AbstractKnnCollector knnCollectorImpl = (AbstractKnnCollector) knnCollector;
int nProbe = DYNAMIC_NPROBE;
// Search strategy may be null if this is being called from checkIndex (e.g. from a test)
if (knnCollector.getSearchStrategy() instanceof IVFKnnSearchStrategy ivfSearchStrategy) {
nProbe = ivfSearchStrategy.getNProbe();
}
Expand Down Expand Up @@ -280,7 +284,10 @@ public final void search(String field, float[] target, KnnCollector knnCollector
long expectedDocs = 0;
long actualDocs = 0;
// initially we visit only the "centroids to search"
while (centroidQueue.size() > 0 && centroidsVisited < nProbe && actualDocs < knnCollector.k()) {
// Note, numCollected is doing the bare minimum here.
// TODO do we need to handle nested doc counts similarly to how we handle
// filtering? E.g. keep exploring until we hit an expected number of parent documents vs. child vectors?
while (centroidQueue.size() > 0 && (centroidsVisited < nProbe || knnCollectorImpl.numCollected() < knnCollector.k())) {
++centroidsVisited;
// todo do we actually need to know the score???
int centroidOrdinal = centroidQueue.pop();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class MapperFeatures implements FeatureSpecification {
);
static final NodeFeature NPE_ON_DIMS_UPDATE_FIX = new NodeFeature("mapper.npe_on_dims_update_fix");
static final NodeFeature IVF_FORMAT_CLUSTER_FEATURE = new NodeFeature("mapper.ivf_format_cluster_feature");
static final NodeFeature IVF_NESTED_SUPPORT = new NodeFeature("mapper.ivf_nested_support");

@Override
public Set<NodeFeature> getTestFeatures() {
Expand Down Expand Up @@ -70,7 +71,8 @@ public Set<NodeFeature> getTestFeatures() {
NPE_ON_DIMS_UPDATE_FIX,
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ,
IVF_FORMAT_CLUSTER_FEATURE
IVF_FORMAT_CLUSTER_FEATURE,
IVF_NESTED_SUPPORT
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.MappingParser;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
Expand All @@ -77,6 +76,7 @@
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.search.vectors.DenseVectorQuery;
import org.elasticsearch.search.vectors.DiversifyingChildrenIVFKnnFloatVectorQuery;
import org.elasticsearch.search.vectors.ESDiversifyingChildrenByteKnnVectorQuery;
import org.elasticsearch.search.vectors.ESDiversifyingChildrenFloatKnnVectorQuery;
import org.elasticsearch.search.vectors.ESKnnByteVectorQuery;
Expand Down Expand Up @@ -1650,7 +1650,7 @@ public boolean supportsElementType(ElementType elementType) {

@Override
public boolean supportsDimension(int dims) {
return dims >= BBQ_MIN_DIMS;
return true;
}
};

Expand Down Expand Up @@ -2521,12 +2521,19 @@ && isNotUnitVector(squaredMagnitude)) {
adjustedK = Math.min((int) Math.ceil(k * oversample), OVERSAMPLE_LIMIT);
numCands = Math.max(adjustedK, numCands);
}
if (parentFilter != null && indexOptions instanceof BBQIVFIndexOptions) {
throw new IllegalArgumentException("IVF index does not support nested queries");
}
Query knnQuery;
if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) {
knnQuery = new IVFKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, bbqIndexOptions.defaultNProbe);
knnQuery = parentFilter != null
? new DiversifyingChildrenIVFKnnFloatVectorQuery(
name(),
queryVector,
adjustedK,
numCands,
filter,
parentFilter,
bbqIndexOptions.defaultNProbe
)
: new IVFKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, bbqIndexOptions.defaultNProbe);
} else {
knnQuery = parentFilter != null
? new ESDiversifyingChildrenFloatKnnVectorQuery(
Expand Down Expand Up @@ -2769,19 +2776,6 @@ public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion).init(this);
}

@Override
public void doValidate(MappingLookup mappers) {
if (indexOptions instanceof BBQIVFIndexOptions && mappers.nestedLookup().getNestedParent(fullPath()) != null) {
throw new IllegalArgumentException(
"["
+ CONTENT_TYPE
+ "] fields with index type ["
+ indexOptions.type
+ "] cannot be indexed if they're within [nested] mappings"
);
}
}

private static IndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) {
@SuppressWarnings("unchecked")
Map<String, ?> indexOptionsMap = (Map<String, ?>) propNode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ abstract TopDocs approximateSearch(
) throws IOException;

protected KnnCollectorManager getKnnCollectorManager(int k, IndexSearcher searcher) {
return new IVFCollectorManager(k, nProbe);
return new IVFCollectorManager(k);
}

@Override
Expand All @@ -195,16 +195,14 @@ protected boolean match(int doc) {

static class IVFCollectorManager implements KnnCollectorManager {
private final int k;
private final int nprobe;

IVFCollectorManager(int k, int nprobe) {
IVFCollectorManager(int k) {
this.k = k;
this.nprobe = nprobe;
}

@Override
public KnnCollector newCollector(int visitedLimit, KnnSearchStrategy searchStrategy, LeafReaderContext context) throws IOException {
return new TopKnnCollector(k, visitedLimit, new IVFKnnSearchStrategy(nprobe));
return new TopKnnCollector(k, visitedLimit, searchStrategy);
}
}
}
Loading