Skip to content

Commit 94863f3

Browse files
authored
[ML] fix bug where initial scale from 0->1 could scale too high (#84244) (#84248)
A bug introduced in #72423 could cause initial scaling from 0->1 to be much too high for the given tier. This only occurs if the autoscaling capacity API was called numerous times in a row while there were no machine learning nodes, but autoscaling was enabled.
1 parent b98036c commit 94863f3

File tree

4 files changed

+26
-16
lines changed

4 files changed

+26
-16
lines changed

docs/changelog/84244.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 84244
2+
summary: Fix bug where initial scale from 0->1 could scale too high
3+
area: Machine Learning
4+
type: bug
5+
issues: []

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -702,14 +702,14 @@ AutoscalingDeciderResult scaleUpFromZero(
702702
// If we still have calculated zero, this means the ml memory tracker does not have the required info.
703703
// So, request a scale for the default. This is only for the 0 -> N scaling case.
704704
if (updatedCapacity.getNodeMlNativeMemoryRequirement() == 0L) {
705-
updatedCapacity.merge(
705+
updatedCapacity = updatedCapacity.merge(
706706
new NativeMemoryCapacity(
707707
ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes(),
708708
ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes()
709709
)
710710
);
711711
}
712-
updatedCapacity.merge(
712+
updatedCapacity = updatedCapacity.merge(
713713
new NativeMemoryCapacity(
714714
MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
715715
MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ static NativeMemoryCapacity from(NativeMemoryCapacity capacity) {
2626
return new NativeMemoryCapacity(capacity.tierMlNativeMemoryRequirement, capacity.nodeMlNativeMemoryRequirement, capacity.jvmSize);
2727
}
2828

29-
private long tierMlNativeMemoryRequirement;
30-
private long nodeMlNativeMemoryRequirement;
31-
private Long jvmSize;
29+
private final long tierMlNativeMemoryRequirement;
30+
private final long nodeMlNativeMemoryRequirement;
31+
private final Long jvmSize;
3232

3333
public NativeMemoryCapacity(long tierMlNativeMemoryRequirement, long nodeMlNativeMemoryRequirement, Long jvmSize) {
3434
this.tierMlNativeMemoryRequirement = tierMlNativeMemoryRequirement;
@@ -39,17 +39,22 @@ public NativeMemoryCapacity(long tierMlNativeMemoryRequirement, long nodeMlNativ
3939
NativeMemoryCapacity(long tierMlNativeMemoryRequirement, long nodeMlNativeMemoryRequirement) {
4040
this.tierMlNativeMemoryRequirement = tierMlNativeMemoryRequirement;
4141
this.nodeMlNativeMemoryRequirement = nodeMlNativeMemoryRequirement;
42+
this.jvmSize = null;
4243
}
4344

44-
NativeMemoryCapacity merge(NativeMemoryCapacity nativeMemoryCapacity) {
45-
this.tierMlNativeMemoryRequirement += nativeMemoryCapacity.tierMlNativeMemoryRequirement;
46-
if (nativeMemoryCapacity.nodeMlNativeMemoryRequirement > this.nodeMlNativeMemoryRequirement) {
47-
this.nodeMlNativeMemoryRequirement = nativeMemoryCapacity.nodeMlNativeMemoryRequirement;
48-
// If the new node size is bigger, we have no way of knowing if the JVM size would stay the same
49-
// So null out
50-
this.jvmSize = null;
51-
}
52-
return this;
45+
/**
46+
* Merges the passed capacity with the current one. A new instance is created and returned
47+
* @param nativeMemoryCapacity the capacity to merge with
48+
* @return a new instance with the merged capacity values
49+
*/
50+
NativeMemoryCapacity merge(final NativeMemoryCapacity nativeMemoryCapacity) {
51+
if (this == nativeMemoryCapacity) return this;
52+
long tier = this.tierMlNativeMemoryRequirement + nativeMemoryCapacity.tierMlNativeMemoryRequirement;
53+
long node = Math.max(nativeMemoryCapacity.nodeMlNativeMemoryRequirement, this.nodeMlNativeMemoryRequirement);
54+
// If the new node size is bigger, we have no way of knowing if the JVM size would stay the same
55+
// So null out
56+
Long jvmSize = nativeMemoryCapacity.nodeMlNativeMemoryRequirement > this.nodeMlNativeMemoryRequirement ? null : this.jvmSize;
57+
return new NativeMemoryCapacity(tier, node, jvmSize);
5358
}
5459

5560
public AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ public void testMerge() {
3030
ByteSizeValue.ofMb(200).getBytes(),
3131
ByteSizeValue.ofMb(50).getBytes()
3232
);
33-
capacity.merge(new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofMb(100).getBytes()));
33+
capacity = capacity.merge(new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofMb(100).getBytes()));
3434
assertThat(capacity.getTierMlNativeMemoryRequirement(), equalTo(ByteSizeValue.ofGb(1).getBytes() * 2L));
3535
assertThat(capacity.getNodeMlNativeMemoryRequirement(), equalTo(ByteSizeValue.ofMb(200).getBytes()));
3636
assertThat(capacity.getJvmSize(), equalTo(ByteSizeValue.ofMb(50).getBytes()));
3737

38-
capacity.merge(new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofMb(300).getBytes()));
38+
capacity = capacity.merge(new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofMb(300).getBytes()));
3939

4040
assertThat(capacity.getTierMlNativeMemoryRequirement(), equalTo(ByteSizeValue.ofGb(1).getBytes() * 3L));
4141
assertThat(capacity.getNodeMlNativeMemoryRequirement(), equalTo(ByteSizeValue.ofMb(300).getBytes()));

0 commit comments

Comments
 (0)