Skip to content

Commit cc3caa2

Browse files
authored
[ML] Add deployment threading details and memory usage to telemetry (#113099) (#113516)
Adds deployment threading options and a new memory section reporting the memory usage for each of the ml features # Conflicts: # server/src/main/java/org/elasticsearch/TransportVersions.java
1 parent b80aed6 commit cc3caa2

File tree

7 files changed

+338
-29
lines changed

7 files changed

+338
-29
lines changed

docs/reference/rest-api/usage.asciidoc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,13 @@ GET /_xpack/usage
195195
}
196196
}
197197
},
198-
"node_count" : 1
198+
"node_count" : 1,
199+
"memory": {
200+
anomaly_detectors_memory_bytes: 0,
201+
data_frame_analytics_memory_bytes: 0,
202+
pytorch_inference_memory_bytes: 0,
203+
total_used_memory_bytes: 0
204+
}
199205
},
200206
"inference": {
201207
"available" : true,

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@ public class MachineLearningFeatureSetUsage extends XPackFeatureSet.Usage {
3131
public static final String NODE_COUNT = "node_count";
3232
public static final String DATA_FRAME_ANALYTICS_JOBS_FIELD = "data_frame_analytics_jobs";
3333
public static final String INFERENCE_FIELD = "inference";
34+
public static final String MEMORY_FIELD = "memory";
3435

3536
private final Map<String, Object> jobsUsage;
3637
private final Map<String, Object> datafeedsUsage;
3738
private final Map<String, Object> analyticsUsage;
3839
private final Map<String, Object> inferenceUsage;
40+
private final Map<String, Object> memoryUsage;
3941
private final int nodeCount;
4042

4143
public MachineLearningFeatureSetUsage(
@@ -45,13 +47,15 @@ public MachineLearningFeatureSetUsage(
4547
Map<String, Object> datafeedsUsage,
4648
Map<String, Object> analyticsUsage,
4749
Map<String, Object> inferenceUsage,
50+
Map<String, Object> memoryUsage,
4851
int nodeCount
4952
) {
5053
super(XPackField.MACHINE_LEARNING, available, enabled);
5154
this.jobsUsage = Objects.requireNonNull(jobsUsage);
5255
this.datafeedsUsage = Objects.requireNonNull(datafeedsUsage);
5356
this.analyticsUsage = Objects.requireNonNull(analyticsUsage);
5457
this.inferenceUsage = Objects.requireNonNull(inferenceUsage);
58+
this.memoryUsage = Objects.requireNonNull(memoryUsage);
5559
this.nodeCount = nodeCount;
5660
}
5761

@@ -62,6 +66,11 @@ public MachineLearningFeatureSetUsage(StreamInput in) throws IOException {
6266
this.analyticsUsage = in.readGenericMap();
6367
this.inferenceUsage = in.readGenericMap();
6468
this.nodeCount = in.readInt();
69+
if (in.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) {
70+
this.memoryUsage = in.readGenericMap();
71+
} else {
72+
this.memoryUsage = Map.of();
73+
}
6574
}
6675

6776
@Override
@@ -77,6 +86,9 @@ public void writeTo(StreamOutput out) throws IOException {
7786
out.writeGenericMap(analyticsUsage);
7887
out.writeGenericMap(inferenceUsage);
7988
out.writeInt(nodeCount);
89+
if (out.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) {
90+
out.writeGenericMap(memoryUsage);
91+
}
8092
}
8193

8294
@Override
@@ -86,9 +98,51 @@ protected void innerXContent(XContentBuilder builder, Params params) throws IOEx
8698
builder.field(DATAFEEDS_FIELD, datafeedsUsage);
8799
builder.field(DATA_FRAME_ANALYTICS_JOBS_FIELD, analyticsUsage);
88100
builder.field(INFERENCE_FIELD, inferenceUsage);
101+
builder.field(MEMORY_FIELD, memoryUsage);
89102
if (nodeCount >= 0) {
90103
builder.field(NODE_COUNT, nodeCount);
91104
}
92105
}
93106

107+
public Map<String, Object> getJobsUsage() {
108+
return jobsUsage;
109+
}
110+
111+
public Map<String, Object> getDatafeedsUsage() {
112+
return datafeedsUsage;
113+
}
114+
115+
public Map<String, Object> getAnalyticsUsage() {
116+
return analyticsUsage;
117+
}
118+
119+
public Map<String, Object> getInferenceUsage() {
120+
return inferenceUsage;
121+
}
122+
123+
public Map<String, Object> getMemoryUsage() {
124+
return memoryUsage;
125+
}
126+
127+
public int getNodeCount() {
128+
return nodeCount;
129+
}
130+
131+
@Override
132+
public boolean equals(Object o) {
133+
if (this == o) return true;
134+
if (o == null || getClass() != o.getClass()) return false;
135+
MachineLearningFeatureSetUsage that = (MachineLearningFeatureSetUsage) o;
136+
return nodeCount == that.nodeCount
137+
&& Objects.equals(jobsUsage, that.jobsUsage)
138+
&& Objects.equals(datafeedsUsage, that.datafeedsUsage)
139+
&& Objects.equals(analyticsUsage, that.analyticsUsage)
140+
&& Objects.equals(inferenceUsage, that.inferenceUsage)
141+
&& Objects.equals(memoryUsage, that.memoryUsage);
142+
}
143+
144+
@Override
145+
public int hashCode() {
146+
return Objects.hash(jobsUsage, datafeedsUsage, analyticsUsage, inferenceUsage, memoryUsage, nodeCount);
147+
}
94148
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.core.ml;
9+
10+
import org.elasticsearch.TransportVersion;
11+
import org.elasticsearch.TransportVersions;
12+
import org.elasticsearch.common.io.stream.Writeable;
13+
import org.elasticsearch.core.Tuple;
14+
15+
import java.io.IOException;
16+
import java.util.Collections;
17+
18+
public class MachineLearningFeatureSetUsageTests extends AbstractBWCWireSerializationTestCase<MachineLearningFeatureSetUsage> {
19+
@Override
20+
protected Writeable.Reader<MachineLearningFeatureSetUsage> instanceReader() {
21+
return MachineLearningFeatureSetUsage::new;
22+
}
23+
24+
@Override
25+
protected MachineLearningFeatureSetUsage createTestInstance() {
26+
boolean enabled = randomBoolean();
27+
28+
if (enabled == false) {
29+
return new MachineLearningFeatureSetUsage(
30+
randomBoolean(),
31+
enabled,
32+
Collections.emptyMap(),
33+
Collections.emptyMap(),
34+
Collections.emptyMap(),
35+
Collections.emptyMap(),
36+
Collections.emptyMap(),
37+
0
38+
);
39+
} else {
40+
return new MachineLearningFeatureSetUsage(
41+
randomBoolean(),
42+
enabled,
43+
randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))),
44+
randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))),
45+
randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))),
46+
randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))),
47+
randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))),
48+
randomIntBetween(1, 10)
49+
);
50+
}
51+
}
52+
53+
@Override
54+
protected MachineLearningFeatureSetUsage mutateInstance(MachineLearningFeatureSetUsage instance) throws IOException {
55+
return null;
56+
}
57+
58+
@Override
59+
protected MachineLearningFeatureSetUsage mutateInstanceForVersion(MachineLearningFeatureSetUsage instance, TransportVersion version) {
60+
if (version.before(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) {
61+
return new MachineLearningFeatureSetUsage(
62+
instance.available(),
63+
instance.enabled(),
64+
instance.getJobsUsage(),
65+
instance.getDatafeedsUsage(),
66+
instance.getAnalyticsUsage(),
67+
instance.getInferenceUsage(),
68+
Collections.emptyMap(),
69+
instance.getNodeCount()
70+
);
71+
}
72+
73+
return instance;
74+
}
75+
}

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,28 @@ public void testStartMultipleLowPriorityDeployments() throws Exception {
11201120
}
11211121
}
11221122

1123+
@SuppressWarnings("unchecked")
1124+
public void testDeploymentThreadsIncludedInUsage() throws IOException {
1125+
String modelId = "deployment_threads_in_usage";
1126+
createPassThroughModel(modelId);
1127+
putModelDefinition(modelId);
1128+
putVocabulary(List.of("these", "are", "my", "words"), modelId);
1129+
startDeployment(modelId);
1130+
1131+
Request request = new Request("GET", "/_xpack/usage");
1132+
var usage = entityAsMap(client().performRequest(request).getEntity());
1133+
1134+
var ml = (Map<String, Object>) usage.get("ml");
1135+
assertNotNull(usage.toString(), ml);
1136+
var inference = (Map<String, Object>) ml.get("inference");
1137+
var deployments = (Map<String, Object>) inference.get("deployments");
1138+
var deploymentStats = (List<Map<String, Object>>) deployments.get("stats_by_model");
1139+
for (var stat : deploymentStats) {
1140+
assertThat(stat.toString(), (Integer) stat.get("num_threads"), greaterThanOrEqualTo(1));
1141+
assertThat(stat.toString(), (Integer) stat.get("num_allocations"), greaterThanOrEqualTo(1));
1142+
}
1143+
}
1144+
11231145
private void putModelDefinition(String modelId) throws IOException {
11241146
putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE);
11251147
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.ml.integration;
9+
10+
import org.elasticsearch.client.Request;
11+
import org.elasticsearch.test.rest.ESRestTestCase;
12+
13+
import java.io.IOException;
14+
import java.util.Map;
15+
16+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
17+
18+
// Test the phone home/telemetry data
19+
public class MlUsageIT extends ESRestTestCase {
20+
21+
@SuppressWarnings("unchecked")
22+
public void testMLUsage() throws IOException {
23+
Request request = new Request("GET", "/_xpack/usage");
24+
var usage = entityAsMap(client().performRequest(request).getEntity());
25+
26+
var ml = (Map<String, Object>) usage.get("ml");
27+
assertNotNull(usage.toString(), ml);
28+
var memoryUsage = (Map<String, Object>) ml.get("memory");
29+
assertNotNull(ml.toString(), memoryUsage);
30+
assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("anomaly_detectors_memory_bytes"), greaterThanOrEqualTo(0));
31+
assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("data_frame_analytics_memory_bytes"), greaterThanOrEqualTo(0));
32+
assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("pytorch_inference_memory_bytes"), greaterThanOrEqualTo(0));
33+
assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("total_used_memory_bytes"), greaterThanOrEqualTo(0));
34+
}
35+
}

0 commit comments

Comments
 (0)