-
Notifications
You must be signed in to change notification settings - Fork 25.5k
Add more dense_vector details for cluster stats field stats #113607
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a3f1c8f
cc92c18
33ea5d1
958aaf9
060596c
663fddb
57dbbb1
6924351
14c3b10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 113607 | ||
summary: Add more `dense_vector` details for cluster stats field stats | ||
area: Search | ||
type: enhancement | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,19 +9,23 @@ | |
|
||
package org.elasticsearch.action.admin.cluster.stats; | ||
|
||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.xcontent.XContentBuilder; | ||
|
||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Holds enhanced stats about a dense vector mapped field. | ||
*/ | ||
public final class DenseVectorFieldStats extends FieldStats { | ||
static final int UNSET = -1; | ||
|
||
static final String NOT_INDEXED = "not_indexed"; | ||
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type | ||
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity | ||
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type | ||
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster | ||
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster | ||
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster | ||
|
@@ -31,28 +35,36 @@ public final class DenseVectorFieldStats extends FieldStats { | |
indexedVectorCount = 0; | ||
indexedVectorDimMin = UNSET; | ||
indexedVectorDimMax = UNSET; | ||
} | ||
|
||
DenseVectorFieldStats(StreamInput in) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that this isn't actually used anywhere. I am going to double check that this is ok. But the underlying There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to remove |
||
super(in); | ||
indexedVectorCount = in.readVInt(); | ||
indexedVectorDimMin = in.readVInt(); | ||
indexedVectorDimMax = in.readVInt(); | ||
vectorIndexTypeCount = new HashMap<>(); | ||
vectorSimilarityTypeCount = new HashMap<>(); | ||
vectorElementTypeCount = new HashMap<>(); | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
super.writeTo(out); | ||
out.writeVInt(indexedVectorCount); | ||
out.writeVInt(indexedVectorDimMin); | ||
out.writeVInt(indexedVectorDimMax); | ||
assert false : "writeTo should not be called on DenseVectorFieldStats"; | ||
} | ||
|
||
@Override | ||
protected void doXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.field("indexed_vector_count", indexedVectorCount); | ||
builder.field("indexed_vector_dim_min", indexedVectorDimMin); | ||
builder.field("indexed_vector_dim_max", indexedVectorDimMax); | ||
if (vectorIndexTypeCount.isEmpty() == false) { | ||
builder.startObject("vector_index_type_count"); | ||
builder.mapContents(vectorIndexTypeCount); | ||
builder.endObject(); | ||
} | ||
if (vectorSimilarityTypeCount.isEmpty() == false) { | ||
builder.startObject("vector_similarity_type_count"); | ||
builder.mapContents(vectorSimilarityTypeCount); | ||
builder.endObject(); | ||
} | ||
if (vectorElementTypeCount.isEmpty() == false) { | ||
builder.startObject("vector_element_type_count"); | ||
builder.mapContents(vectorElementTypeCount); | ||
builder.endObject(); | ||
} | ||
} | ||
|
||
@Override | ||
|
@@ -69,11 +81,53 @@ public boolean equals(Object o) { | |
DenseVectorFieldStats that = (DenseVectorFieldStats) o; | ||
return indexedVectorCount == that.indexedVectorCount | ||
&& indexedVectorDimMin == that.indexedVectorDimMin | ||
&& indexedVectorDimMax == that.indexedVectorDimMax; | ||
&& indexedVectorDimMax == that.indexedVectorDimMax | ||
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount) | ||
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount) | ||
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax); | ||
return Objects.hash( | ||
super.hashCode(), | ||
indexedVectorCount, | ||
indexedVectorDimMin, | ||
indexedVectorDimMax, | ||
vectorIndexTypeCount, | ||
vectorSimilarityTypeCount, | ||
vectorElementTypeCount | ||
); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "DenseVectorFieldStats{" | ||
+ "vectorIndexTypeCount=" | ||
+ vectorIndexTypeCount | ||
+ ", vectorSimilarityTypeCount=" | ||
+ vectorSimilarityTypeCount | ||
+ ", vectorElementTypeCount=" | ||
+ vectorElementTypeCount | ||
+ ", indexedVectorCount=" | ||
+ indexedVectorCount | ||
+ ", indexedVectorDimMin=" | ||
+ indexedVectorDimMin | ||
+ ", indexedVectorDimMax=" | ||
+ indexedVectorDimMax | ||
+ ", scriptCount=" | ||
+ scriptCount | ||
+ ", scriptLangs=" | ||
+ scriptLangs | ||
+ ", fieldScriptStats=" | ||
+ fieldScriptStats | ||
+ ", name='" | ||
+ name | ||
+ '\'' | ||
+ ", count=" | ||
+ count | ||
+ ", indexCount=" | ||
+ indexCount | ||
+ '}'; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,9 +86,30 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) { | |
FieldStats stats; | ||
if (type.equals("dense_vector")) { | ||
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new); | ||
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false; | ||
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats; | ||
if (fieldMapping.containsKey("similarity")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought |
||
Object similarity = fieldMapping.get("similarity"); | ||
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count); | ||
} | ||
String elementTypeStr = "float"; | ||
if (fieldMapping.containsKey("element_type")) { | ||
Object elementType = fieldMapping.get("element_type"); | ||
elementTypeStr = elementType.toString(); | ||
} | ||
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count); | ||
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index"); | ||
if (indexed) { | ||
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats; | ||
Object indexOptions = fieldMapping.get("index_options"); | ||
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping | ||
// if the value is truly missing & we are indexed, we default to hnsw. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Under what condition this could happen that the value for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mayya-sharipova when we default to |
||
String indexTypeStr = "hnsw"; | ||
if (indexOptions instanceof Map<?, ?> indexOptionsMap) { | ||
Object indexType = indexOptionsMap.get("type"); | ||
if (indexType != null) { | ||
indexTypeStr = indexType.toString(); | ||
} | ||
} | ||
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count); | ||
vStats.indexedVectorCount += count; | ||
Object obj = fieldMapping.get("dims"); | ||
if (obj != null) { | ||
|
@@ -100,6 +121,8 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) { | |
vStats.indexedVectorDimMax = dims; | ||
} | ||
} | ||
} else { | ||
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1); | ||
} | ||
} else { | ||
stats = fieldTypes.computeIfAbsent(type, FieldStats::new); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we be concerned about these becoming huge (and hence OOMs & co)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tteofili I wouldn't think so. We have a static number of element types, index types, and similarity types. Since we are counting each separately, we won't have combinatoric explosion. These each will be hashmaps of < 10 elements each or so.