Skip to content

Commit 398a37e

Browse files
committed
[ML] Add Telemetry for models without adaptive allocations
Added min and max allocations as attributes to the telemetry for trained models with adaptive allocations enabled. Added telemetry for models with adaptive allocations disabled or never set.
1 parent b5d5229 commit 398a37e

File tree

4 files changed

+81
-7
lines changed

4 files changed

+81
-7
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlMetrics.java

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,25 @@ private void registerMasterNodeMetrics(MeterRegistry meterRegistry) {
272272
() -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsFailedAllocations, isMasterMap)
273273
)
274274
);
275+
metrics.add(
276+
meterRegistry.registerLongGauge(
277+
"es.ml.trained_models.deployment.fixed_allocations.current",
278+
"Sum of current trained model allocations that do not use adaptive allocations (either enabled or disabled)",
279+
"allocations",
280+
() -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsFixedAllocations, isMasterMap)
281+
)
282+
);
283+
/*
284+
* AdaptiveAllocationsScalerService tracks the number of allocations with adaptive allocations enabled.
285+
*/
286+
metrics.add(
287+
meterRegistry.registerLongGauge(
288+
"es.ml.trained_models.deployment.disabled_adaptive_allocations.current",
289+
"Sum of current trained model allocations that have adaptive allocations disabled",
290+
"allocations",
291+
() -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsDisabledAdaptiveAllocations, isMasterMap)
292+
)
293+
);
275294
}
276295

277296
@Override
@@ -484,17 +503,28 @@ static TrainedModelAllocationCounts findTrainedModelAllocationCounts(TrainedMode
484503
int trainedModelsTargetAllocations = 0;
485504
int trainedModelsCurrentAllocations = 0;
486505
int trainedModelsFailedAllocations = 0;
506+
int trainedModelsFixedAllocations = 0;
507+
int trainedModelsDisabledAdaptiveAllocations = 0;
487508

488509
for (TrainedModelAssignment trainedModelAssignment : metadata.allAssignments().values()) {
489510
trainedModelsTargetAllocations += trainedModelAssignment.totalTargetAllocations();
490-
trainedModelsCurrentAllocations += trainedModelAssignment.totalCurrentAllocations();
491511
trainedModelsFailedAllocations += trainedModelAssignment.totalFailedAllocations();
512+
513+
trainedModelsCurrentAllocations += trainedModelAssignment.totalCurrentAllocations();
514+
if (trainedModelAssignment.getAdaptiveAllocationsSettings() == null) {
515+
trainedModelsFixedAllocations += trainedModelAssignment.totalCurrentAllocations();
516+
} else if ((trainedModelAssignment.getAdaptiveAllocationsSettings().getEnabled() == null)
517+
|| (trainedModelAssignment.getAdaptiveAllocationsSettings().getEnabled() == false)) {
518+
trainedModelsDisabledAdaptiveAllocations += trainedModelAssignment.totalCurrentAllocations();
519+
}
492520
}
493521

494522
return new TrainedModelAllocationCounts(
495523
trainedModelsTargetAllocations,
496524
trainedModelsCurrentAllocations,
497-
trainedModelsFailedAllocations
525+
trainedModelsFailedAllocations,
526+
trainedModelsFixedAllocations,
527+
trainedModelsDisabledAdaptiveAllocations
498528
);
499529
}
500530

@@ -556,8 +586,10 @@ record MlTaskStatusCounts(
556586
record TrainedModelAllocationCounts(
557587
int trainedModelsTargetAllocations,
558588
int trainedModelsCurrentAllocations,
559-
int trainedModelsFailedAllocations
589+
int trainedModelsFailedAllocations,
590+
int trainedModelsFixedAllocations,
591+
int trainedModelsDisabledAdaptiveAllocations
560592
) {
561-
static final TrainedModelAllocationCounts EMPTY = new TrainedModelAllocationCounts(0, 0, 0);
593+
static final TrainedModelAllocationCounts EMPTY = new TrainedModelAllocationCounts(0, 0, 0, 0, 0);
562594
}
563595
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,12 @@ public Double getLastMeasuredInferenceTime() {
239239
public Long getLastMeasuredQueueSize() {
240240
return lastMeasuredQueueSize;
241241
}
242+
243+
public Integer getMinNumberOfAllocations() {
244+
return minNumberOfAllocations;
245+
}
246+
247+
public Integer getMaxNumberOfAllocations() {
248+
return maxNumberOfAllocations;
249+
}
242250
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
import java.util.ArrayList;
4242
import java.util.Collection;
43+
import java.util.Collections;
4344
import java.util.HashMap;
4445
import java.util.HashSet;
4546
import java.util.List;
@@ -105,7 +106,7 @@ void init() {
105106
"es.ml.trained_models.adaptive_allocations.actual_number_of_allocations.current",
106107
"the actual number of allocations",
107108
"",
108-
() -> observeLong(AdaptiveAllocationsScaler::getNumberOfAllocations)
109+
this::observeAllocationCount
109110
)
110111
);
111112
metrics.add(
@@ -179,6 +180,22 @@ Collection<DoubleWithAttributes> observeDouble(Function<AdaptiveAllocationsScale
179180
}
180181
return observations;
181182
}
183+
184+
Collection<LongWithAttributes> observeAllocationCount() {
185+
return scalers.values().stream().map(scaler -> {
186+
var value = scaler.getNumberOfAllocations();
187+
var min = scaler.getMinNumberOfAllocations();
188+
var max = scaler.getMaxNumberOfAllocations();
189+
190+
var attributes = new HashMap<String, Object>(3);
191+
attributes.put("deployment_id", scaler.getDeploymentId());
192+
attributes.put("min_number_of_allocations", min != null ? min : 0);
193+
if (max != null) {
194+
attributes.put("max_number_of_allocations", max);
195+
}
196+
return new LongWithAttributes(value, Collections.unmodifiableMap(attributes));
197+
}).toList();
198+
}
182199
}
183200

184201
/**

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlMetricsTests.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState;
2222
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
2323
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
24+
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
2425
import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingInfo;
2526
import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingState;
2627
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
@@ -146,11 +147,27 @@ public void testFindTrainedModelAllocationCounts() {
146147
TrainedModelAssignment.Builder.empty(mock(StartTrainedModelDeploymentAction.TaskParams.class), null)
147148
.addRoutingEntry("node2", new RoutingInfo(0, 1, RoutingState.STARTING, ""))
148149
);
150+
metadataBuilder.addNewAssignment(
151+
"model4",
152+
TrainedModelAssignment.Builder.empty(
153+
mock(StartTrainedModelDeploymentAction.TaskParams.class),
154+
new AdaptiveAllocationsSettings(true, 0, 1)
155+
).addRoutingEntry("node1", new RoutingInfo(0, 0, RoutingState.STARTING, ""))
156+
);
157+
metadataBuilder.addNewAssignment(
158+
"model5",
159+
TrainedModelAssignment.Builder.empty(
160+
mock(StartTrainedModelDeploymentAction.TaskParams.class),
161+
new AdaptiveAllocationsSettings(false, 1, 1)
162+
).addRoutingEntry("node1", new RoutingInfo(1, 1, RoutingState.STARTING, ""))
163+
);
149164

150165
MlMetrics.TrainedModelAllocationCounts counts = MlMetrics.findTrainedModelAllocationCounts(metadataBuilder.build());
151-
assertThat(counts.trainedModelsTargetAllocations(), is(5));
152-
assertThat(counts.trainedModelsCurrentAllocations(), is(3));
166+
assertThat(counts.trainedModelsTargetAllocations(), is(6));
167+
assertThat(counts.trainedModelsCurrentAllocations(), is(4));
153168
assertThat(counts.trainedModelsFailedAllocations(), is(1));
169+
assertThat(counts.trainedModelsFixedAllocations(), is(3));
170+
assertThat(counts.trainedModelsDisabledAdaptiveAllocations(), is(1));
154171
}
155172

156173
public void testFindNativeMemoryFree() {

0 commit comments

Comments
 (0)