Skip to content

Commit 6f9d68d

Browse files
Metrics for indexing failures due to version conflicts (#119067)
This exposes new OTel node and index based metrics for indexing failures due to version conflicts. In addition, the /_cat/shards, /_cat/indices and /_cat/nodes APIs also expose the same metric, under the newly added column iifvc. Relates: #107601 (cherry picked from commit 12eb1cf) # Conflicts: # server/src/main/java/org/elasticsearch/TransportVersions.java
1 parent 7f3ab2c commit 6f9d68d

File tree

23 files changed

+578
-65
lines changed

23 files changed

+578
-65
lines changed

docs/changelog/119067.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 119067
2+
summary: Metrics for indexing failures due to version conflicts
3+
area: CRUD
4+
type: feature
5+
issues: []

docs/reference/cat/nodes.asciidoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ Number of indexing operations, such as `1`.
239239
`indexing.index_failed`, `iif`, `indexingIndexFailed`::
240240
Number of failed indexing operations, such as `0`.
241241

242+
`indexing.index_failed_due_to_version_conflict`, `iifvc`, `indexingIndexFailedDueToVersionConflict`::
243+
Number of failed indexing operations due to version conflict, such as `0`.
244+
242245
`merges.current`, `mc`, `mergesCurrent`::
243246
Number of current merge operations, such as `0`.
244247

docs/reference/cat/shards.asciidoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ Number of indexing operations, such as `1`.
159159
`indexing.index_failed`, `iif`, `indexingIndexFailed`::
160160
Number of failed indexing operations, such as `0`.
161161

162+
`indexing.index_failed_due_to_version_conflict`, `iifvc`, `indexingIndexFailedDueToVersionConflict`::
163+
Number of failed indexing operations due to version conflict, such as `0`.
164+
162165
`merges.current`, `mc`, `mergesCurrent`::
163166
Number of current merge operations, such as `0`.
164167

modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ private static ShardStats getShardStats(IndexMetadata indexMeta, int shardIndex,
493493
CommonStats stats = new CommonStats();
494494
stats.docs = new DocsStats(100, 0, randomByteSizeValue().getBytes());
495495
stats.store = new StoreStats();
496-
stats.indexing = new IndexingStats(new IndexingStats.Stats(1, 1, 1, 1, 1, 1, 1, 1, false, 1, targetWriteLoad, 1));
496+
stats.indexing = new IndexingStats(new IndexingStats.Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, false, 1, targetWriteLoad, 1));
497497
return new ShardStats(shardRouting, new ShardPath(false, path, path, shardId), stats, null, null, null, false, 0);
498498
}
499499

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
indexing.index_time .+ \n
4747
indexing.index_total .+ \n
4848
indexing.index_failed .+ \n
49+
indexing.index_failed_due_to_version_conflict .+ \n
4950
merges.current .+ \n
5051
merges.current_docs .+ \n
5152
merges.current_size .+ \n

server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import java.io.IOException;
3232
import java.util.Collection;
33+
import java.util.HashMap;
3334
import java.util.List;
3435
import java.util.Map;
3536

@@ -76,6 +77,7 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
7677
static final String STANDARD_INDEXING_COUNT = "es.indices.standard.indexing.total";
7778
static final String STANDARD_INDEXING_TIME = "es.indices.standard.indexing.time";
7879
static final String STANDARD_INDEXING_FAILURE = "es.indices.standard.indexing.failure.total";
80+
static final String STANDARD_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT = "es.indices.standard.indexing.failure.version_conflict.total";
7981

8082
static final String TIME_SERIES_INDEX_COUNT = "es.indices.time_series.total";
8183
static final String TIME_SERIES_BYTES_SIZE = "es.indices.time_series.size";
@@ -89,6 +91,8 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
8991
static final String TIME_SERIES_INDEXING_COUNT = "es.indices.time_series.indexing.total";
9092
static final String TIME_SERIES_INDEXING_TIME = "es.indices.time_series.indexing.time";
9193
static final String TIME_SERIES_INDEXING_FAILURE = "es.indices.time_series.indexing.failure.total";
94+
static final String TIME_SERIES_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT =
95+
"es.indices.time_series.indexing.failure.version_conflict.total";
9296

9397
static final String LOGSDB_INDEX_COUNT = "es.indices.logsdb.total";
9498
static final String LOGSDB_BYTES_SIZE = "es.indices.logsdb.size";
@@ -102,6 +106,7 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
102106
static final String LOGSDB_INDEXING_COUNT = "es.indices.logsdb.indexing.total";
103107
static final String LOGSDB_INDEXING_TIME = "es.indices.logsdb.indexing.time";
104108
static final String LOGSDB_INDEXING_FAILURE = "es.indices.logsdb.indexing.failure.total";
109+
static final String LOGSDB_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT = "es.indices.logsdb.indexing.failure.version_conflict.total";
105110

106111
public void testIndicesMetrics() {
107112
String indexNode = internalCluster().startNode();
@@ -132,7 +137,9 @@ public void testIndicesMetrics() {
132137
STANDARD_INDEXING_TIME,
133138
greaterThanOrEqualTo(0L),
134139
STANDARD_INDEXING_FAILURE,
135-
equalTo(indexing1.getIndexFailedCount() - indexing0.getIndexCount())
140+
equalTo(indexing1.getIndexFailedCount() - indexing0.getIndexFailedCount()),
141+
STANDARD_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
142+
equalTo(indexing1.getIndexFailedDueToVersionConflictCount() - indexing0.getIndexFailedDueToVersionConflictCount())
136143
)
137144
);
138145

@@ -155,7 +162,9 @@ public void testIndicesMetrics() {
155162
TIME_SERIES_INDEXING_TIME,
156163
greaterThanOrEqualTo(0L),
157164
TIME_SERIES_INDEXING_FAILURE,
158-
equalTo(indexing2.getIndexFailedCount() - indexing1.getIndexFailedCount())
165+
equalTo(indexing1.getIndexFailedCount() - indexing0.getIndexFailedCount()),
166+
TIME_SERIES_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
167+
equalTo(indexing1.getIndexFailedDueToVersionConflictCount() - indexing0.getIndexFailedDueToVersionConflictCount())
159168
)
160169
);
161170

@@ -177,36 +186,50 @@ public void testIndicesMetrics() {
177186
LOGSDB_INDEXING_TIME,
178187
greaterThanOrEqualTo(0L),
179188
LOGSDB_INDEXING_FAILURE,
180-
equalTo(indexing3.getIndexFailedCount() - indexing2.getIndexFailedCount())
189+
equalTo(indexing3.getIndexFailedCount() - indexing2.getIndexFailedCount()),
190+
LOGSDB_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
191+
equalTo(indexing3.getIndexFailedDueToVersionConflictCount() - indexing2.getIndexFailedDueToVersionConflictCount())
181192
)
182193
);
183194
// already collected indexing stats
184-
collectThenAssertMetrics(
185-
telemetry,
186-
4,
195+
Map<String, Matcher<Long>> zeroMatchers = new HashMap<>();
196+
zeroMatchers.putAll(
187197
Map.of(
188198
STANDARD_INDEXING_COUNT,
189199
equalTo(0L),
190200
STANDARD_INDEXING_TIME,
191201
equalTo(0L),
192202
STANDARD_INDEXING_FAILURE,
193203
equalTo(0L),
194-
204+
STANDARD_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
205+
equalTo(0L)
206+
)
207+
);
208+
zeroMatchers.putAll(
209+
Map.of(
195210
TIME_SERIES_INDEXING_COUNT,
196211
equalTo(0L),
197212
TIME_SERIES_INDEXING_TIME,
198213
equalTo(0L),
199214
TIME_SERIES_INDEXING_FAILURE,
200215
equalTo(0L),
201-
216+
TIME_SERIES_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
217+
equalTo(0L)
218+
)
219+
);
220+
zeroMatchers.putAll(
221+
Map.of(
202222
LOGSDB_INDEXING_COUNT,
203223
equalTo(0L),
204224
LOGSDB_INDEXING_TIME,
205225
equalTo(0L),
206226
LOGSDB_INDEXING_FAILURE,
227+
equalTo(0L),
228+
LOGSDB_INDEXING_FAILURE_DUE_TO_VERSION_CONFLICT,
207229
equalTo(0L)
208230
)
209231
);
232+
collectThenAssertMetrics(telemetry, 4, zeroMatchers);
210233
String searchNode = internalCluster().startDataOnlyNode();
211234
indicesService = internalCluster().getInstance(IndicesService.class, searchNode);
212235
telemetry = internalCluster().getInstance(PluginsService.class, searchNode)

0 commit comments

Comments
 (0)