Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/113607.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113607
summary: Add more `dense_vector` details for cluster stats field stats
area: Search
type: enhancement
issues: []
9 changes: 9 additions & 0 deletions docs/reference/cluster/stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
`indexed_vector_dim_max`::
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.

`vector_index_type_count`::
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.

`vector_similarity_type_count`::
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.

`vector_element_type_count`::
(object) For dense_vector field types, the number of vector types by element type in selected nodes.

`script_count`::
(integer) Number of fields that declare a script.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
setup:
- requires:
cluster_features: [ "gte_v8.4.0" ]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- skip:
features: headers

Expand All @@ -15,11 +18,16 @@ setup:
dims: 768
index: true
similarity: l2_norm
element_type: byte
index_options:
type: hnsw
vector2:
type: dense_vector
dims: 1024
index: true
similarity: dot_product
index_options:
type: int8_hnsw
vector3:
type: dense_vector
dims: 100
Expand All @@ -41,9 +49,24 @@ setup:

---
"Field mapping stats":
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
- match: { indices.mappings.field_types.0.count: 4 }
- match: { indices.mappings.field_types.0.index_count: 2 }
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
---
"Field mapping stats with field details":
- requires:
cluster_features: ["gte_v8.4.0"]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_cluster/stats
capabilities:
- "verbose-dense-vector-mapping-stats"
reason: "Capability required to run test"
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
Expand All @@ -52,3 +75,10 @@ setup:
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@

package org.elasticsearch.action.admin.cluster.stats;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

/**
* Holds enhanced stats about a dense vector mapped field.
*/
public final class DenseVectorFieldStats extends FieldStats {
static final int UNSET = -1;

static final String NOT_INDEXED = "not_indexed";
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
Expand All @@ -31,28 +35,36 @@ public final class DenseVectorFieldStats extends FieldStats {
indexedVectorCount = 0;
indexedVectorDimMin = UNSET;
indexedVectorDimMax = UNSET;
}

DenseVectorFieldStats(StreamInput in) throws IOException {
super(in);
indexedVectorCount = in.readVInt();
indexedVectorDimMin = in.readVInt();
indexedVectorDimMax = in.readVInt();
vectorIndexTypeCount = new HashMap<>();
vectorSimilarityTypeCount = new HashMap<>();
vectorElementTypeCount = new HashMap<>();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeVInt(indexedVectorCount);
out.writeVInt(indexedVectorDimMin);
out.writeVInt(indexedVectorDimMax);
assert false : "writeTo should not be called on DenseVectorFieldStats";
}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.field("indexed_vector_count", indexedVectorCount);
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
if (vectorIndexTypeCount.isEmpty() == false) {
builder.startObject("vector_index_type_count");
builder.mapContents(vectorIndexTypeCount);
builder.endObject();
}
if (vectorSimilarityTypeCount.isEmpty() == false) {
builder.startObject("vector_similarity_type_count");
builder.mapContents(vectorSimilarityTypeCount);
builder.endObject();
}
if (vectorElementTypeCount.isEmpty() == false) {
builder.startObject("vector_element_type_count");
builder.mapContents(vectorElementTypeCount);
builder.endObject();
}
}

@Override
Expand All @@ -69,11 +81,53 @@ public boolean equals(Object o) {
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
return indexedVectorCount == that.indexedVectorCount
&& indexedVectorDimMin == that.indexedVectorDimMin
&& indexedVectorDimMax == that.indexedVectorDimMax;
&& indexedVectorDimMax == that.indexedVectorDimMax
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
return Objects.hash(
super.hashCode(),
indexedVectorCount,
indexedVectorDimMin,
indexedVectorDimMax,
vectorIndexTypeCount,
vectorSimilarityTypeCount,
vectorElementTypeCount
);
}

@Override
public String toString() {
return "DenseVectorFieldStats{"
+ "vectorIndexTypeCount="
+ vectorIndexTypeCount
+ ", vectorSimilarityTypeCount="
+ vectorSimilarityTypeCount
+ ", vectorElementTypeCount="
+ vectorElementTypeCount
+ ", indexedVectorCount="
+ indexedVectorCount
+ ", indexedVectorDimMin="
+ indexedVectorDimMin
+ ", indexedVectorDimMax="
+ indexedVectorDimMax
+ ", scriptCount="
+ scriptCount
+ ", scriptLangs="
+ scriptLangs
+ ", fieldScriptStats="
+ fieldScriptStats
+ ", name='"
+ name
+ '\''
+ ", count="
+ count
+ ", indexCount="
+ indexCount
+ '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,30 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) {
FieldStats stats;
if (type.equals("dense_vector")) {
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
if (fieldMapping.containsKey("similarity")) {
Object similarity = fieldMapping.get("similarity");
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
}
String elementTypeStr = "float";
if (fieldMapping.containsKey("element_type")) {
Object elementType = fieldMapping.get("element_type");
elementTypeStr = elementType.toString();
}
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
if (indexed) {
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
Object indexOptions = fieldMapping.get("index_options");
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
// if the value is truly missing & we are indexed, we default to hnsw.
String indexTypeStr = "hnsw";
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
Object indexType = indexOptionsMap.get("type");
if (indexType != null) {
indexTypeStr = indexType.toString();
}
}
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
vStats.indexedVectorCount += count;
Object obj = fieldMapping.get("dims");
if (obj != null) {
Expand All @@ -100,6 +121,8 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) {
vStats.indexedVectorDimMax = dims;
}
}
} else {
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
}
} else {
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
@ServerlessScope(Scope.INTERNAL)
public class RestClusterStatsAction extends BaseRestHandler {

private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
"human-readable-total-docs-size",
"verbose-dense-vector-mapping-stats"
);
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Set.copyOf(Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats")));
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");
private static final Set<String> SUPPORTED_QUERY_PARAMETERS = Set.of("include_remotes", "nodeId", REST_TIMEOUT_PARAM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,16 @@ public void testToXContent() {
"index_count" : 2,
"indexed_vector_count" : 2,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 2
},
"vector_similarity_type_count" : {
"dot_product" : 2
},
"vector_element_type_count" : {
"float" : 2
}
},
{
"name" : "keyword",
Expand Down Expand Up @@ -234,7 +243,16 @@ public void testToXContentWithSomeSharedMappings() {
"index_count" : 3,
"indexed_vector_count" : 3,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 3
},
"vector_similarity_type_count" : {
"dot_product" : 3
},
"vector_element_type_count" : {
"float" : 3
}
},
{
"name" : "keyword",
Expand Down Expand Up @@ -460,6 +478,11 @@ public void testDenseVectorType() {
expectedStats.indexedVectorCount = 2 * indicesCount;
expectedStats.indexedVectorDimMin = 768;
expectedStats.indexedVectorDimMax = 1024;
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
}

Expand Down