Skip to content

Commit 4739c5f

Browse files
committed
[ML] Handle new actual_memory_usage_bytes field in model size stats.
The `actual_memory_usage_bytes` field represents the real, physical memory allocated to the `autodetect` process as reported by the OS. Reporting this value in the model size stats associated with an AD job is useful, especially in OOM situations.
1 parent bcdf51a commit 4739c5f

File tree

6 files changed

+49
-2
lines changed

6 files changed

+49
-2
lines changed

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ static TransportVersion def(int id) {
213213
public static final TransportVersion REMOTE_EXCEPTION = def(9_044_0_00);
214214
public static final TransportVersion ESQL_REMOVE_AGGREGATE_TYPE = def(9_045_0_00);
215215
public static final TransportVersion ADD_PROJECT_ID_TO_DSL_ERROR_INFO = def(9_046_0_00);
216+
public static final TransportVersion ML_AD_ACTUAL_MEMORY_USAGE = def(9_047_0_00);
216217

217218
/*
218219
* STOP! READ THIS FIRST! No, really,

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MlConfigVersion.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,13 @@ private static void checkUniqueness(int id, String uniqueId) {
153153
// V_11 is used in ELSER v2 package configs
154154
public static final MlConfigVersion V_11 = registerMlConfigVersion(11_00_0_0_99, "79CB2950-57C7-11EE-AE5D-0800200C9A66");
155155
public static final MlConfigVersion V_12 = registerMlConfigVersion(12_00_0_0_99, "Trained model config prefix strings added");
156+
public static final MlConfigVersion V_13 = registerMlConfigVersion(13_00_0_0_99, "Anomaly Detection reports actual memory usage");
156157

157158
/**
158159
* Reference to the most recent Ml config version.
159160
* This should be the Ml config version with the highest id.
160161
*/
161-
public static final MlConfigVersion CURRENT = V_12;
162+
public static final MlConfigVersion CURRENT = V_13;
162163

163164
/**
164165
* Reference to the first MlConfigVersion that is detached from the

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
4141
*/
4242
public static final ParseField MODEL_BYTES_FIELD = new ParseField("model_bytes");
4343
public static final ParseField PEAK_MODEL_BYTES_FIELD = new ParseField("peak_model_bytes");
44+
public static final ParseField ACTUAL_MEMORY_USAGE_BYTES = new ParseField("actual_memory_usage_bytes");
4445
public static final ParseField MODEL_BYTES_EXCEEDED_FIELD = new ParseField("model_bytes_exceeded");
4546
public static final ParseField MODEL_BYTES_MEMORY_LIMIT_FIELD = new ParseField("model_bytes_memory_limit");
4647
public static final ParseField TOTAL_BY_FIELD_COUNT_FIELD = new ParseField("total_by_field_count");
@@ -74,6 +75,7 @@ private static ConstructingObjectParser<Builder, Void> createParser(boolean igno
7475
parser.declareString((modelSizeStat, s) -> {}, Result.RESULT_TYPE);
7576
parser.declareLong(Builder::setModelBytes, MODEL_BYTES_FIELD);
7677
parser.declareLong(Builder::setPeakModelBytes, PEAK_MODEL_BYTES_FIELD);
78+
parser.declareLong(Builder::setActualMemoryUsageBytes, ACTUAL_MEMORY_USAGE_BYTES);
7779
parser.declareLong(Builder::setModelBytesExceeded, MODEL_BYTES_EXCEEDED_FIELD);
7880
parser.declareLong(Builder::setModelBytesMemoryLimit, MODEL_BYTES_MEMORY_LIMIT_FIELD);
7981
parser.declareLong(Builder::setBucketAllocationFailuresCount, BUCKET_ALLOCATION_FAILURES_COUNT_FIELD);
@@ -152,14 +154,16 @@ public String toString() {
152154
* 1. The job's model_memory_limit
153155
* 2. The current model memory, i.e. what's reported in model_bytes of this object
154156
* 3. The peak model memory, i.e. what's reported in peak_model_bytes of this object
157+
* 4. The actual memory usage, i.e. what's reported in actual_memory_usage_bytes of this object
155158
* The field storing this enum can also be <code>null</code>, which means the
156159
* assignment code will decide on the fly - this was the old behaviour prior
157160
* to 7.11.
158161
*/
159162
public enum AssignmentMemoryBasis implements Writeable {
160163
MODEL_MEMORY_LIMIT,
161164
CURRENT_MODEL_BYTES,
162-
PEAK_MODEL_BYTES;
165+
PEAK_MODEL_BYTES,
166+
ACTUAL_MEMORY_USAGE_BYTES;
163167

164168
public static AssignmentMemoryBasis fromString(String statusName) {
165169
return valueOf(statusName.trim().toUpperCase(Locale.ROOT));
@@ -183,6 +187,7 @@ public String toString() {
183187
private final String jobId;
184188
private final long modelBytes;
185189
private final Long peakModelBytes;
190+
private final Long actualMemoryUsageBytes;
186191
private final Long modelBytesExceeded;
187192
private final Long modelBytesMemoryLimit;
188193
private final long totalByFieldCount;
@@ -206,6 +211,7 @@ private ModelSizeStats(
206211
String jobId,
207212
long modelBytes,
208213
Long peakModelBytes,
214+
Long actualMemoryUsageBytes,
209215
Long modelBytesExceeded,
210216
Long modelBytesMemoryLimit,
211217
long totalByFieldCount,
@@ -228,6 +234,7 @@ private ModelSizeStats(
228234
this.jobId = jobId;
229235
this.modelBytes = modelBytes;
230236
this.peakModelBytes = peakModelBytes;
237+
this.actualMemoryUsageBytes = actualMemoryUsageBytes;
231238
this.modelBytesExceeded = modelBytesExceeded;
232239
this.modelBytesMemoryLimit = modelBytesMemoryLimit;
233240
this.totalByFieldCount = totalByFieldCount;
@@ -252,6 +259,11 @@ public ModelSizeStats(StreamInput in) throws IOException {
252259
jobId = in.readString();
253260
modelBytes = in.readVLong();
254261
peakModelBytes = in.readOptionalLong();
262+
if (in.getTransportVersion().onOrAfter(TransportVersions.ML_AD_ACTUAL_MEMORY_USAGE)) {
263+
actualMemoryUsageBytes = in.readOptionalLong();
264+
} else {
265+
actualMemoryUsageBytes = null;
266+
}
255267
modelBytesExceeded = in.readOptionalLong();
256268
modelBytesMemoryLimit = in.readOptionalLong();
257269
totalByFieldCount = in.readVLong();
@@ -293,6 +305,9 @@ public void writeTo(StreamOutput out) throws IOException {
293305
out.writeString(jobId);
294306
out.writeVLong(modelBytes);
295307
out.writeOptionalLong(peakModelBytes);
308+
if (out.getTransportVersion().onOrAfter(TransportVersions.ML_AD_ACTUAL_MEMORY_USAGE)) {
309+
out.writeOptionalLong(actualMemoryUsageBytes);
310+
}
296311
out.writeOptionalLong(modelBytesExceeded);
297312
out.writeOptionalLong(modelBytesMemoryLimit);
298313
out.writeVLong(totalByFieldCount);
@@ -339,6 +354,9 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio
339354
if (peakModelBytes != null) {
340355
builder.field(PEAK_MODEL_BYTES_FIELD.getPreferredName(), peakModelBytes);
341356
}
357+
if (actualMemoryUsageBytes != null) {
358+
builder.field(ACTUAL_MEMORY_USAGE_BYTES.getPreferredName(), actualMemoryUsageBytes);
359+
}
342360
if (modelBytesExceeded != null) {
343361
builder.field(MODEL_BYTES_EXCEEDED_FIELD.getPreferredName(), modelBytesExceeded);
344362
}
@@ -391,6 +409,10 @@ public Long getPeakModelBytes() {
391409
return peakModelBytes;
392410
}
393411

412+
public Long getActualMemoryUsageBytes() {
413+
return actualMemoryUsageBytes;
414+
}
415+
394416
public Long getModelBytesExceeded() {
395417
return modelBytesExceeded;
396418
}
@@ -479,6 +501,7 @@ public int hashCode() {
479501
jobId,
480502
modelBytes,
481503
peakModelBytes,
504+
actualMemoryUsageBytes,
482505
modelBytesExceeded,
483506
modelBytesMemoryLimit,
484507
totalByFieldCount,
@@ -517,6 +540,7 @@ public boolean equals(Object other) {
517540

518541
return this.modelBytes == that.modelBytes
519542
&& Objects.equals(this.peakModelBytes, that.peakModelBytes)
543+
&& this.actualMemoryUsageBytes == that.actualMemoryUsageBytes
520544
&& Objects.equals(this.modelBytesExceeded, that.modelBytesExceeded)
521545
&& Objects.equals(this.modelBytesMemoryLimit, that.modelBytesMemoryLimit)
522546
&& this.totalByFieldCount == that.totalByFieldCount
@@ -543,6 +567,7 @@ public static class Builder {
543567
private final String jobId;
544568
private long modelBytes;
545569
private Long peakModelBytes;
570+
private Long actualMemoryUsageBytes;
546571
private Long modelBytesExceeded;
547572
private Long modelBytesMemoryLimit;
548573
private long totalByFieldCount;
@@ -573,6 +598,7 @@ public Builder(ModelSizeStats modelSizeStats) {
573598
this.jobId = modelSizeStats.jobId;
574599
this.modelBytes = modelSizeStats.modelBytes;
575600
this.peakModelBytes = modelSizeStats.peakModelBytes;
601+
this.actualMemoryUsageBytes = modelSizeStats.actualMemoryUsageBytes;
576602
this.modelBytesExceeded = modelSizeStats.modelBytesExceeded;
577603
this.modelBytesMemoryLimit = modelSizeStats.modelBytesMemoryLimit;
578604
this.totalByFieldCount = modelSizeStats.totalByFieldCount;
@@ -603,6 +629,12 @@ public Builder setPeakModelBytes(long peakModelBytes) {
603629
return this;
604630
}
605631

632+
public Builder setActualMemoryUsageBytes(long actualMemoryUsageBytes)
633+
{
634+
this.actualMemoryUsageBytes = actualMemoryUsageBytes;
635+
return this;
636+
}
637+
606638
public Builder setModelBytesExceeded(long modelBytesExceeded) {
607639
this.modelBytesExceeded = modelBytesExceeded;
608640
return this;
@@ -700,6 +732,7 @@ public ModelSizeStats build() {
700732
jobId,
701733
modelBytes,
702734
peakModelBytes,
735+
actualMemoryUsageBytes,
703736
modelBytesExceeded,
704737
modelBytesMemoryLimit,
705738
totalByFieldCount,

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,13 @@ void calculateEstablishedMemoryUsage(
15901590
handler.accept((storedPeak != null) ? storedPeak : latestModelSizeStats.getModelBytes());
15911591
return;
15921592
}
1593+
case ACTUAL_MEMORY_USAGE_BYTES -> {
1594+
Long storedActualMemoryUsageBytes = latestModelSizeStats.getActualMemoryUsageBytes();
1595+
handler.accept(
1596+
(storedActualMemoryUsageBytes != null) ? storedActualMemoryUsageBytes : latestModelSizeStats.getModelBytes()
1597+
);
1598+
return;
1599+
}
15931600
}
15941601
}
15951602

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectProcessManager.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,8 @@ public ByteSizeValue getOpenProcessMemoryUsage() {
10761076
case MODEL_MEMORY_LIMIT -> Optional.ofNullable(modelSizeStats.getModelBytesMemoryLimit()).orElse(0L);
10771077
case CURRENT_MODEL_BYTES -> modelSizeStats.getModelBytes();
10781078
case PEAK_MODEL_BYTES -> Optional.ofNullable(modelSizeStats.getPeakModelBytes()).orElse(modelSizeStats.getModelBytes());
1079+
case ACTUAL_MEMORY_USAGE_BYTES -> Optional.ofNullable(modelSizeStats.getActualMemoryUsageBytes())
1080+
.orElse(modelSizeStats.getModelBytes());
10791081
};
10801082
memoryUsedBytes += Job.PROCESS_MEMORY_OVERHEAD.getBytes();
10811083
}

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectProcessManagerTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,10 +834,12 @@ public void testGetOpenProcessMemoryUsage() {
834834
long modelMemoryLimitBytes = ByteSizeValue.ofMb(randomIntBetween(10, 1000)).getBytes();
835835
long peakModelBytes = randomLongBetween(100000, modelMemoryLimitBytes - 1);
836836
long modelBytes = randomLongBetween(1, peakModelBytes - 1);
837+
long actualMemoryUsageBytes = randomLongBetween(262144, peakModelBytes - 1);
837838
AssignmentMemoryBasis assignmentMemoryBasis = randomFrom(AssignmentMemoryBasis.values());
838839
modelSizeStats = new ModelSizeStats.Builder("foo").setModelBytesMemoryLimit(modelMemoryLimitBytes)
839840
.setPeakModelBytes(peakModelBytes)
840841
.setModelBytes(modelBytes)
842+
.setActualMemoryUsageBytes(actualMemoryUsageBytes)
841843
.setAssignmentMemoryBasis(assignmentMemoryBasis)
842844
.build();
843845
when(autodetectCommunicator.getModelSizeStats()).thenReturn(modelSizeStats);
@@ -850,6 +852,7 @@ public void testGetOpenProcessMemoryUsage() {
850852
case MODEL_MEMORY_LIMIT -> modelMemoryLimitBytes;
851853
case CURRENT_MODEL_BYTES -> modelBytes;
852854
case PEAK_MODEL_BYTES -> peakModelBytes;
855+
case ACTUAL_MEMORY_USAGE_BYTES -> actualMemoryUsageBytes;
853856
};
854857
assertThat(manager.getOpenProcessMemoryUsage(), equalTo(ByteSizeValue.ofBytes(expectedSizeBytes)));
855858
}

0 commit comments

Comments
 (0)