Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2aa5ef7
Add usage stats for semantic_text fields
dimitris-athanasiou Sep 15, 2025
7f4ca01
Update docs/changelog/135262.yaml
dimitris-athanasiou Sep 23, 2025
abfcf80
Fix changelog
dimitris-athanasiou Sep 23, 2025
00fb4b2
Prepare SemanticTextStatsTests for BWC testing
dimitris-athanasiou Sep 24, 2025
2f05e03
Add YAML test
dimitris-athanasiou Sep 24, 2025
fb71c57
Fix YAML test
dimitris-athanasiou Sep 24, 2025
421e77e
Revert "Fix YAML test"
dimitris-athanasiou Sep 24, 2025
79e1d95
Strip linux suffix from model_id for default stats
dimitris-athanasiou Sep 24, 2025
1922534
Correct linux suffix this time
dimitris-athanasiou Sep 24, 2025
e286108
Changelog area is Vector Search
dimitris-athanasiou Sep 25, 2025
70fe35b
Address some review points
dimitris-athanasiou Sep 25, 2025
6e76c14
Address evil edge case
dimitris-athanasiou Sep 25, 2025
8d2796d
Do not omit zero values
dimitris-athanasiou Sep 25, 2025
4d2c2fd
[CI] Auto commit changes from spotless
Sep 25, 2025
3656419
Also exclude hidden indices
dimitris-athanasiou Sep 25, 2025
3d4f273
Address more review comments
dimitris-athanasiou Sep 29, 2025
a9f7d79
Merge branch 'main' into usage-for-semantic-text
dimitris-athanasiou Sep 29, 2025
d9dc8b8
Merge branch 'main' into usage-for-semantic-text
dimitris-athanasiou Sep 29, 2025
6f3b519
Merge branch 'main' into usage-for-semantic-text
dimitris-athanasiou Sep 29, 2025
809631b
Only add semantic_text stats if task_type is compatible
dimitris-athanasiou Sep 29, 2025
632b7cb
Contain task type compatibility in TransportInferenceUsageAction
dimitris-athanasiou Sep 29, 2025
c2e6dd3
Merge branch 'main' into usage-for-semantic-text
dimitris-athanasiou Sep 29, 2025
4408342
Merge branch 'main' into usage-for-semantic-text
dimitris-athanasiou Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/135262.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 135262
summary: Add usage stats for `semantic_text` fields
area: "Vector Search"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
9182000
2 changes: 1 addition & 1 deletion server/src/main/resources/transport/upper_bounds/9.2.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
index_reshard_shardcount_small,9181000
inference_telemetry_added_semantic_text_stats,9182000
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

package org.elasticsearch.xpack.core.inference.usage;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
Expand All @@ -19,28 +22,34 @@

public class ModelStats implements ToXContentObject, Writeable {

public static final NodeFeature SEMANTIC_TEXT_USAGE = new NodeFeature("inference.semantic_text_usage");

static final TransportVersion INFERENCE_TELEMETRY_ADDED_SEMANTIC_TEXT_STATS = TransportVersion.fromName(
"inference_telemetry_added_semantic_text_stats"
);

private final String service;
private final TaskType taskType;
private long count;
@Nullable
private final SemanticTextStats semanticTextStats;

public ModelStats(String service, TaskType taskType) {
this(service, taskType, 0L);
}

public ModelStats(String service, TaskType taskType, long count) {
public ModelStats(String service, TaskType taskType, long count, @Nullable SemanticTextStats semanticTextStats) {
this.service = service;
this.taskType = taskType;
this.count = count;
}

public ModelStats(ModelStats stats) {
this(stats.service, stats.taskType, stats.count);
this.semanticTextStats = semanticTextStats;
}

public ModelStats(StreamInput in) throws IOException {
this.service = in.readString();
this.taskType = in.readEnum(TaskType.class);
this.count = in.readLong();
if (in.getTransportVersion().supports(INFERENCE_TELEMETRY_ADDED_SEMANTIC_TEXT_STATS)) {
this.semanticTextStats = in.readOptional(SemanticTextStats::new);
} else {
this.semanticTextStats = null;
}
}

public void add() {
Expand All @@ -59,6 +68,11 @@ public long count() {
return count;
}

@Nullable
public SemanticTextStats semanticTextStats() {
return semanticTextStats;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand All @@ -71,25 +85,34 @@ public void addXContentFragment(XContentBuilder builder, Params params) throws I
builder.field("service", service);
builder.field("task_type", taskType.name());
builder.field("count", count);
if (semanticTextStats != null) {
builder.field("semantic_text", semanticTextStats);
}
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(service);
out.writeEnum(taskType);
out.writeLong(count);
if (out.getTransportVersion().supports(INFERENCE_TELEMETRY_ADDED_SEMANTIC_TEXT_STATS)) {
out.writeOptionalWriteable(semanticTextStats);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ModelStats that = (ModelStats) o;
return count == that.count && Objects.equals(service, that.service) && taskType == that.taskType;
return count == that.count
&& Objects.equals(service, that.service)
&& taskType == that.taskType
&& Objects.equals(semanticTextStats, that.semanticTextStats);
}

@Override
public int hashCode() {
return Objects.hash(service, taskType, count);
return Objects.hash(service, taskType, count, semanticTextStats);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.core.inference.usage;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.Objects;

public class SemanticTextStats implements ToXContentObject, Writeable {

private static final String FIELD_COUNT = "field_count";
private static final String INDICES_COUNT = "indices_count";
private static final String INFERENCE_ID_COUNT = "inference_id_count";

private long fieldCount;
private long indicesCount;
private long inferenceIdCount;

public SemanticTextStats() {}

public SemanticTextStats(long fieldCount, long indicesCount, long inferenceIdCount) {
this.fieldCount = fieldCount;
this.indicesCount = indicesCount;
this.inferenceIdCount = inferenceIdCount;
}

public SemanticTextStats(StreamInput in) throws IOException {
fieldCount = in.readVLong();
indicesCount = in.readVLong();
inferenceIdCount = in.readVLong();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(fieldCount);
out.writeVLong(indicesCount);
out.writeVLong(inferenceIdCount);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(FIELD_COUNT, fieldCount);
builder.field(INDICES_COUNT, indicesCount);
builder.field(INFERENCE_ID_COUNT, inferenceIdCount);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SemanticTextStats that = (SemanticTextStats) o;
return fieldCount == that.fieldCount && indicesCount == that.indicesCount && inferenceIdCount == that.inferenceIdCount;
}

@Override
public int hashCode() {
return Objects.hash(fieldCount, indicesCount, inferenceIdCount);
}

public long getFieldCount() {
return fieldCount;
}

public long getIndicesCount() {
return indicesCount;
}

public long getInferenceIdCount() {
return inferenceIdCount;
}

public void addFieldCount(long fieldCount) {
this.fieldCount += fieldCount;
}

public void incIndicesCount() {
this.indicesCount++;
}

public void setInferenceIdCount(long inferenceIdCount) {
this.inferenceIdCount = inferenceIdCount;
}

public boolean isEmpty() {
return fieldCount == 0 && indicesCount == 0 && inferenceIdCount == 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@

package org.elasticsearch.xpack.core.inference.usage;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.test.AbstractWireSerializingTestCase;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase;

import java.io.IOException;

import static org.hamcrest.Matchers.equalTo;

public class ModelStatsTests extends AbstractWireSerializingTestCase<ModelStats> {
public class ModelStatsTests extends AbstractBWCWireSerializationTestCase<ModelStats> {

@Override
protected Writeable.Reader<ModelStats> instanceReader() {
Expand All @@ -33,16 +34,28 @@ protected ModelStats mutateInstance(ModelStats modelStats) throws IOException {
String service = modelStats.service();
TaskType taskType = modelStats.taskType();
long count = modelStats.count();
return switch (randomInt(2)) {
case 0 -> new ModelStats(randomValueOtherThan(service, ESTestCase::randomIdentifier), taskType, count);
case 1 -> new ModelStats(service, randomValueOtherThan(taskType, () -> randomFrom(TaskType.values())), count);
case 2 -> new ModelStats(service, taskType, randomValueOtherThan(count, ESTestCase::randomLong));
SemanticTextStats semanticTextStats = modelStats.semanticTextStats();
return switch (randomInt(3)) {
case 0 -> new ModelStats(randomValueOtherThan(service, ESTestCase::randomIdentifier), taskType, count, semanticTextStats);
case 1 -> new ModelStats(
service,
randomValueOtherThan(taskType, () -> randomFrom(TaskType.values())),
count,
semanticTextStats
);
case 2 -> new ModelStats(service, taskType, randomValueOtherThan(count, ESTestCase::randomLong), semanticTextStats);
case 3 -> new ModelStats(
service,
taskType,
count,
randomValueOtherThan(semanticTextStats, SemanticTextStatsTests::createRandomInstance)
);
default -> throw new IllegalArgumentException();
};
}

public void testAdd() {
ModelStats stats = new ModelStats("test_service", randomFrom(TaskType.values()));
ModelStats stats = new ModelStats("test_service", randomFrom(TaskType.values()), 0, null);
assertThat(stats.count(), equalTo(0L));

stats.add();
Expand All @@ -56,6 +69,20 @@ public void testAdd() {
}

public static ModelStats createRandomInstance() {
return new ModelStats(randomIdentifier(), randomFrom(TaskType.values()), randomLong());
TaskType taskType = randomValueOtherThan(TaskType.ANY, () -> randomFrom(TaskType.values()));
return new ModelStats(
randomIdentifier(),
taskType,
randomLong(),
randomBoolean() ? SemanticTextStatsTests.createRandomInstance() : null
);
}

@Override
protected ModelStats mutateInstanceForVersion(ModelStats instance, TransportVersion version) {
if (version.supports(ModelStats.INFERENCE_TELEMETRY_ADDED_SEMANTIC_TEXT_STATS) == false) {
return new ModelStats(instance.service(), instance.taskType(), instance.count(), null);
}
return instance;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.core.inference.usage;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase;

import java.io.IOException;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;

public class SemanticTextStatsTests extends AbstractBWCWireSerializationTestCase<SemanticTextStats> {

@Override
protected Writeable.Reader<SemanticTextStats> instanceReader() {
return SemanticTextStats::new;
}

@Override
protected SemanticTextStats createTestInstance() {
return createRandomInstance();
}

static SemanticTextStats createRandomInstance() {
return new SemanticTextStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong());
}

@Override
protected SemanticTextStats mutateInstance(SemanticTextStats instance) throws IOException {
return switch (randomInt(2)) {
case 0 -> new SemanticTextStats(
randomValueOtherThan(instance.getFieldCount(), ESTestCase::randomNonNegativeLong),
instance.getIndicesCount(),
instance.getInferenceIdCount()
);
case 1 -> new SemanticTextStats(
instance.getFieldCount(),
randomValueOtherThan(instance.getIndicesCount(), ESTestCase::randomNonNegativeLong),
instance.getInferenceIdCount()
);
case 2 -> new SemanticTextStats(
instance.getFieldCount(),
instance.getIndicesCount(),
randomValueOtherThan(instance.getInferenceIdCount(), ESTestCase::randomNonNegativeLong)
);
default -> throw new IllegalArgumentException();
};
}

public void testDefaultConstructor() {
var stats = new SemanticTextStats();
assertThat(stats.getFieldCount(), equalTo(0L));
assertThat(stats.getIndicesCount(), equalTo(0L));
assertThat(stats.getInferenceIdCount(), equalTo(0L));
}

public void testAddFieldCount() {
var stats = new SemanticTextStats();
stats.addFieldCount(10L);
assertThat(stats.getFieldCount(), equalTo(10L));
stats.addFieldCount(32L);
assertThat(stats.getFieldCount(), equalTo(42L));
}

public void testIsEmpty() {
assertThat(new SemanticTextStats().isEmpty(), is(true));
assertThat(new SemanticTextStats(randomLongBetween(1, Long.MAX_VALUE), 0, 0).isEmpty(), is(false));
assertThat(new SemanticTextStats(0, randomLongBetween(1, Long.MAX_VALUE), 0).isEmpty(), is(false));
assertThat(new SemanticTextStats(0, 0, randomLongBetween(1, Long.MAX_VALUE)).isEmpty(), is(false));
}

@Override
protected SemanticTextStats mutateInstanceForVersion(SemanticTextStats instance, TransportVersion version) {
return instance;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.elasticsearch.features.FeatureSpecification;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.xpack.core.inference.usage.ModelStats;
import org.elasticsearch.xpack.inference.mapper.SemanticInferenceMetadataFieldsMapper;
import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper;
import org.elasticsearch.xpack.inference.queries.InterceptedInferenceQueryBuilder;
Expand Down Expand Up @@ -94,7 +95,8 @@ public Set<NodeFeature> getTestFeatures() {
SemanticQueryBuilder.SEMANTIC_QUERY_MULTIPLE_INFERENCE_IDS,
SemanticQueryBuilder.SEMANTIC_QUERY_FILTER_FIELD_CAPS_FIX,
InterceptedInferenceQueryBuilder.NEW_SEMANTIC_QUERY_INTERCEPTORS,
TEXT_SIMILARITY_RERANKER_SNIPPETS
TEXT_SIMILARITY_RERANKER_SNIPPETS,
ModelStats.SEMANTIC_TEXT_USAGE
)
);
testFeatures.addAll(getFeatures());
Expand Down
Loading