Skip to content
5 changes: 5 additions & 0 deletions docs/changelog/113607.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113607
summary: Add more `dense_vector` details for cluster stats field stats
area: Search
type: enhancement
issues: []
9 changes: 9 additions & 0 deletions docs/reference/cluster/stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
`indexed_vector_dim_max`::
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.

`vector_index_type_count`::
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.

`vector_similarity_type_count`::
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.

`vector_element_type_count`::
(object) For dense_vector field types, the number of vector types by element type in selected nodes.

`script_count`::
(integer) Number of fields that declare a script.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
setup:
- requires:
cluster_features: [ "gte_v8.4.0" ]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- skip:
features: headers

Expand All @@ -15,11 +18,16 @@ setup:
dims: 768
index: true
similarity: l2_norm
element_type: byte
index_options:
type: hnsw
vector2:
type: dense_vector
dims: 1024
index: true
similarity: dot_product
index_options:
type: int8_hnsw
vector3:
type: dense_vector
dims: 100
Expand All @@ -41,9 +49,24 @@ setup:

---
"Field mapping stats":
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
- match: { indices.mappings.field_types.0.count: 4 }
- match: { indices.mappings.field_types.0.index_count: 2 }
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
---
"Field mapping stats with field details":
- requires:
cluster_features: ["gte_v8.4.0"]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_cluster/stats
capabilities:
- "verbose-dense-vector-mapping-stats"
reason: "Capability required to run test"
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
Expand All @@ -52,3 +75,10 @@ setup:
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@

package org.elasticsearch.action.admin.cluster.stats;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

/**
* Holds enhanced stats about a dense vector mapped field.
*/
public final class DenseVectorFieldStats extends FieldStats {
static final int UNSET = -1;

static final String NOT_INDEXED = "not_indexed";
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
Comment on lines +26 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we be concerned about these becoming huge (and hence OOMs & co)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tteofili I wouldn't think so. We have a static number of element types, index types, and similarity types. Since we are counting each separately, we won't have combinatoric explosion. These each will be hashmaps of < 10 elements each or so.

int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
Expand All @@ -31,28 +35,36 @@ public final class DenseVectorFieldStats extends FieldStats {
indexedVectorCount = 0;
indexedVectorDimMin = UNSET;
indexedVectorDimMax = UNSET;
}

DenseVectorFieldStats(StreamInput in) throws IOException {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that this isn't actually used anywhere. I am going to double check that this is ok. But the underlying fieldstats serialization isn't via named writables & consequently the DenseVectorFieldStats is never actually written or read.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to remove

super(in);
indexedVectorCount = in.readVInt();
indexedVectorDimMin = in.readVInt();
indexedVectorDimMax = in.readVInt();
vectorIndexTypeCount = new HashMap<>();
vectorSimilarityTypeCount = new HashMap<>();
vectorElementTypeCount = new HashMap<>();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeVInt(indexedVectorCount);
out.writeVInt(indexedVectorDimMin);
out.writeVInt(indexedVectorDimMax);
assert false : "writeTo should not be called on DenseVectorFieldStats";
}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.field("indexed_vector_count", indexedVectorCount);
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
if (vectorIndexTypeCount.isEmpty() == false) {
builder.startObject("vector_index_type_count");
builder.mapContents(vectorIndexTypeCount);
builder.endObject();
}
if (vectorSimilarityTypeCount.isEmpty() == false) {
builder.startObject("vector_similarity_type_count");
builder.mapContents(vectorSimilarityTypeCount);
builder.endObject();
}
if (vectorElementTypeCount.isEmpty() == false) {
builder.startObject("vector_element_type_count");
builder.mapContents(vectorElementTypeCount);
builder.endObject();
}
}

@Override
Expand All @@ -69,11 +81,53 @@ public boolean equals(Object o) {
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
return indexedVectorCount == that.indexedVectorCount
&& indexedVectorDimMin == that.indexedVectorDimMin
&& indexedVectorDimMax == that.indexedVectorDimMax;
&& indexedVectorDimMax == that.indexedVectorDimMax
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
return Objects.hash(
super.hashCode(),
indexedVectorCount,
indexedVectorDimMin,
indexedVectorDimMax,
vectorIndexTypeCount,
vectorSimilarityTypeCount,
vectorElementTypeCount
);
}

@Override
public String toString() {
return "DenseVectorFieldStats{"
+ "vectorIndexTypeCount="
+ vectorIndexTypeCount
+ ", vectorSimilarityTypeCount="
+ vectorSimilarityTypeCount
+ ", vectorElementTypeCount="
+ vectorElementTypeCount
+ ", indexedVectorCount="
+ indexedVectorCount
+ ", indexedVectorDimMin="
+ indexedVectorDimMin
+ ", indexedVectorDimMax="
+ indexedVectorDimMax
+ ", scriptCount="
+ scriptCount
+ ", scriptLangs="
+ scriptLangs
+ ", fieldScriptStats="
+ fieldScriptStats
+ ", name='"
+ name
+ '\''
+ ", count="
+ count
+ ", indexCount="
+ indexCount
+ '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,30 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) {
FieldStats stats;
if (type.equals("dense_vector")) {
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
if (fieldMapping.containsKey("similarity")) {
Copy link
Contributor

@mayya-sharipova mayya-sharipova Sep 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought similarity can only be provided when indexed=true? Should we put this under indexed (line 101)?

Object similarity = fieldMapping.get("similarity");
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
}
String elementTypeStr = "float";
if (fieldMapping.containsKey("element_type")) {
Object elementType = fieldMapping.get("element_type");
elementTypeStr = elementType.toString();
}
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
if (indexed) {
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
Object indexOptions = fieldMapping.get("index_options");
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
// if the value is truly missing & we are indexed, we default to hnsw.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under what condition this could happen that the value for index_options.type is missing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mayya-sharipova when we default to index: true, but the element type is byte. in that scenario we won't provide a value to the mapping and its instead hnsw.

String indexTypeStr = "hnsw";
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
Object indexType = indexOptionsMap.get("type");
if (indexType != null) {
indexTypeStr = indexType.toString();
}
}
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
vStats.indexedVectorCount += count;
Object obj = fieldMapping.get("dims");
if (obj != null) {
Expand All @@ -100,6 +121,8 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) {
vStats.indexedVectorDimMax = dims;
}
}
} else {
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
}
} else {
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
@ServerlessScope(Scope.INTERNAL)
public class RestClusterStatsAction extends BaseRestHandler {

private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
"human-readable-total-docs-size",
"verbose-dense-vector-mapping-stats"
);
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,16 @@ public void testToXContent() {
"index_count" : 2,
"indexed_vector_count" : 2,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 2
},
"vector_similarity_type_count" : {
"dot_product" : 2
},
"vector_element_type_count" : {
"float" : 2
}
},
{
"name" : "keyword",
Expand Down Expand Up @@ -234,7 +243,16 @@ public void testToXContentWithSomeSharedMappings() {
"index_count" : 3,
"indexed_vector_count" : 3,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 3
},
"vector_similarity_type_count" : {
"dot_product" : 3
},
"vector_element_type_count" : {
"float" : 3
}
},
{
"name" : "keyword",
Expand Down Expand Up @@ -460,6 +478,11 @@ public void testDenseVectorType() {
expectedStats.indexedVectorCount = 2 * indicesCount;
expectedStats.indexedVectorDimMin = 768;
expectedStats.indexedVectorDimMax = 1024;
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
}

Expand Down