-
Notifications
You must be signed in to change notification settings - Fork 25.6k
[ML] Add Telemetry for models without adaptive allocations #129161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
398a37e
fee6f83
d288c58
7dee3fe
aad532c
8362f48
7015d66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 129161 | ||
| summary: Add Telemetry for models without adaptive allocations | ||
| area: Machine Learning | ||
| type: enhancement | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -272,6 +272,25 @@ private void registerMasterNodeMetrics(MeterRegistry meterRegistry) { | |
| () -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsFailedAllocations, isMasterMap) | ||
| ) | ||
| ); | ||
| metrics.add( | ||
| meterRegistry.registerLongGauge( | ||
| "es.ml.trained_models.deployment.fixed_allocations.current", | ||
| "Sum of current trained model allocations that do not use adaptive allocations (either enabled or disabled)", | ||
| "allocations", | ||
| () -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsFixedAllocations, isMasterMap) | ||
| ) | ||
| ); | ||
| /* | ||
| * AdaptiveAllocationsScalerService tracks the number of allocations with adaptive allocations enabled. | ||
| */ | ||
| metrics.add( | ||
| meterRegistry.registerLongGauge( | ||
| "es.ml.trained_models.deployment.disabled_adaptive_allocations.current", | ||
| "Sum of current trained model allocations that have adaptive allocations disabled", | ||
| "allocations", | ||
| () -> new LongWithAttributes(trainedModelAllocationCounts.trainedModelsDisabledAdaptiveAllocations, isMasterMap) | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -484,17 +503,28 @@ static TrainedModelAllocationCounts findTrainedModelAllocationCounts(TrainedMode | |
| int trainedModelsTargetAllocations = 0; | ||
| int trainedModelsCurrentAllocations = 0; | ||
| int trainedModelsFailedAllocations = 0; | ||
| int trainedModelsFixedAllocations = 0; | ||
| int trainedModelsDisabledAdaptiveAllocations = 0; | ||
|
|
||
| for (TrainedModelAssignment trainedModelAssignment : metadata.allAssignments().values()) { | ||
| trainedModelsTargetAllocations += trainedModelAssignment.totalTargetAllocations(); | ||
| trainedModelsCurrentAllocations += trainedModelAssignment.totalCurrentAllocations(); | ||
| trainedModelsFailedAllocations += trainedModelAssignment.totalFailedAllocations(); | ||
|
|
||
| trainedModelsCurrentAllocations += trainedModelAssignment.totalCurrentAllocations(); | ||
| if (trainedModelAssignment.getAdaptiveAllocationsSettings() == null) { | ||
| trainedModelsFixedAllocations += trainedModelAssignment.totalCurrentAllocations(); | ||
|
||
| } else if ((trainedModelAssignment.getAdaptiveAllocationsSettings().getEnabled() == null) | ||
| || (trainedModelAssignment.getAdaptiveAllocationsSettings().getEnabled() == false)) { | ||
| trainedModelsDisabledAdaptiveAllocations += trainedModelAssignment.totalCurrentAllocations(); | ||
| } | ||
| } | ||
|
|
||
| return new TrainedModelAllocationCounts( | ||
| trainedModelsTargetAllocations, | ||
| trainedModelsCurrentAllocations, | ||
| trainedModelsFailedAllocations | ||
| trainedModelsFailedAllocations, | ||
| trainedModelsFixedAllocations, | ||
| trainedModelsDisabledAdaptiveAllocations | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -556,8 +586,10 @@ record MlTaskStatusCounts( | |
| record TrainedModelAllocationCounts( | ||
| int trainedModelsTargetAllocations, | ||
| int trainedModelsCurrentAllocations, | ||
| int trainedModelsFailedAllocations | ||
| int trainedModelsFailedAllocations, | ||
| int trainedModelsFixedAllocations, | ||
| int trainedModelsDisabledAdaptiveAllocations | ||
| ) { | ||
| static final TrainedModelAllocationCounts EMPTY = new TrainedModelAllocationCounts(0, 0, 0); | ||
| static final TrainedModelAllocationCounts EMPTY = new TrainedModelAllocationCounts(0, 0, 0, 0, 0); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can the project type be added to the attribute map? If there are different rules for different project types it would be useful to split the data that way
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so? It looks like it comes from
serverless.project_typewhich isn't available here. We could move this metric to serverless, or we can use ES|QL magic to pull in the project type from other metrics via the project id.It's possible this will get automatically added when running in serverless.