Skip to content

Commit d1d0156

Browse files
authored
feat: Allow custom prometheus info metric (opendatahub-io#83)
#### Motivation Allow the generation of an "info" metric by way of environment variables captured as `Gauge` metric set up during container startup #### Modifications - Added environment variable `MMESH_CUSTOM_ENV_VAR` in `ModelMeshEnvVars` - Added map `infoMetricParams` and logic to parse and pass the info to prometheus in `ModelMesh` - Added a `Gauge` using the parsed label names and values in `Metrics` - Added sample variables to `pom.xml` for testing and related test in `ModelMeshMetricsTest` #### Result - Support to pass information by via environment variables parsed as `<metricname>[;label1=envVarWithValueforLabel1,label2=envVarWithValueforLabel2,...,labelN=envVarWithValueforLabelN,]` if provided. Signed-off-by: Rafael Vasquez <[email protected]>
1 parent eb384db commit d1d0156

File tree

5 files changed

+77
-12
lines changed

5 files changed

+77
-12
lines changed

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,13 @@
164164
</argLine>
165165
<!-- required to workaround issue with openjdk 8u181-b13-2 -->
166166
<useSystemClassLoader>false</useSystemClassLoader>
167+
<environmentVariables>
168+
<MM_INFO_METRICS>assistant_deployment_info:relabel;deployment=DEPLOYMENT_NAME,slot=SLOT_NAME,component=COMPONENT_NAME,group=GROUP_NAME</MM_INFO_METRICS>
169+
<DEPLOYMENT_NAME>ga-tf-mm</DEPLOYMENT_NAME>
170+
<SLOT_NAME>ga</SLOT_NAME>
171+
<COMPONENT_NAME>tf-mm</COMPONENT_NAME>
172+
<GROUP_NAME>clu</GROUP_NAME>
173+
</environmentVariables>
167174
</configuration>
168175
</plugin>
169176

src/main/java/com/ibm/watson/modelmesh/Metrics.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,20 @@
3636
import org.apache.logging.log4j.LogManager;
3737
import org.apache.logging.log4j.Logger;
3838

39+
import java.lang.reflect.Array;
3940
import java.net.SocketAddress;
4041
import java.nio.channels.DatagramChannel;
41-
import java.util.Collections;
42-
import java.util.EnumMap;
43-
import java.util.HashMap;
44-
import java.util.HashSet;
45-
import java.util.Map;
42+
import java.util.*;
4643
import java.util.Map.Entry;
47-
import java.util.Set;
4844
import java.util.concurrent.Callable;
4945
import java.util.concurrent.LinkedBlockingQueue;
5046
import java.util.concurrent.TimeUnit;
47+
import java.util.stream.Stream;
5148

5249
import static com.ibm.watson.modelmesh.Metric.*;
5350
import static com.ibm.watson.modelmesh.ModelMesh.M;
51+
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR;
52+
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR;
5453
import static java.util.concurrent.TimeUnit.*;
5554

5655
/**
@@ -150,12 +149,14 @@ final class PrometheusMetrics implements Metrics {
150149
5000, 10000, 20000, 60000, 120000, 300000
151150
};
152151

152+
private static final int INFO_METRICS_MAX = 5;
153+
153154
private final CollectorRegistry registry;
154155
private final NettyServer metricServer;
155156
private final boolean shortNames;
156157
private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class);
157158

158-
public PrometheusMetrics(Map<String, String> params) throws Exception {
159+
public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception {
159160
int port = 2112;
160161
boolean shortNames = true;
161162
boolean https = true;
@@ -230,6 +231,24 @@ public PrometheusMetrics(Map<String, String> params) throws Exception {
230231
}
231232
}
232233

234+
if (infoMetricParams != null && !infoMetricParams.isEmpty()){
235+
if (infoMetricParams.size() > INFO_METRICS_MAX) {
236+
throw new Exception("Too many info metrics provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
237+
+ infoMetricParams+ "\". The max is " + INFO_METRICS_MAX);
238+
}
239+
240+
String metric_name = infoMetricParams.remove("metric_name");
241+
String[] labelNames = infoMetricParams.keySet().toArray(String[]::new);
242+
String[] labelValues = Stream.of(labelNames).map(infoMetricParams::get).toArray(String[]::new);
243+
Gauge infoMetricsGauge = Gauge.build()
244+
.name(metric_name)
245+
.help("Info Metrics")
246+
.labelNames(labelNames)
247+
.create();
248+
infoMetricsGauge.labels(labelValues).set(1.0);
249+
registry.register(infoMetricsGauge);
250+
}
251+
233252
this.metricServer = new NettyServer(registry, port, https);
234253
this.shortNames = shortNames;
235254

src/main/java/com/ibm/watson/modelmesh/ModelMesh.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,9 @@ protected final TProcessor initialize() throws Exception {
925925
// }
926926

927927
// "type" or "type:p1=v1;p2=v2;...;pn=vn"
928-
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
928+
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z;]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
929+
// "metric_name" or "metric:name;l1=v1,l2=v2,...,ln=vn,"
930+
private static final Pattern CUSTOM_METRIC_CONFIG_PATT = Pattern.compile("([a-z_:]+);(\\w+=[^;]+(?:;\\w+=[^,]+)*)?");
929931

930932
private static Metrics setUpMetrics() throws Exception {
931933
if (System.getenv("MM_METRICS_STATSD_PORT") != null || System.getenv("MM_METRICS_PROMETHEUS_PORT") != null) {
@@ -958,12 +960,38 @@ private static Metrics setUpMetrics() throws Exception {
958960
params.put(kv[0], kv[1]);
959961
}
960962
}
963+
String infoMetricConfig = getStringParameter(MMESH_CUSTOM_ENV_VAR, null);
964+
Map<String, String> infoMetricParams;
965+
if (infoMetricConfig == null) {
966+
logger.info("{} returned null", MMESH_CUSTOM_ENV_VAR);
967+
infoMetricParams = null;
968+
} else {
969+
logger.info("{} set to \"{}\"", MMESH_CUSTOM_ENV_VAR, infoMetricConfig);
970+
Matcher infoMetricMatcher = CUSTOM_METRIC_CONFIG_PATT.matcher(infoMetricConfig);
971+
if (!infoMetricMatcher.matches()) {
972+
throw new Exception("Invalid metrics configuration provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
973+
+ infoMetricConfig + "\"");
974+
}
975+
String infoMetricName = infoMetricMatcher.group(1);
976+
String infoMetricParamString = infoMetricMatcher.group(2);
977+
infoMetricParams = new HashMap<>();
978+
infoMetricParams.put("metric_name", infoMetricName);
979+
for (String infoMetricParam : infoMetricParamString.substring(0).split(",")) {
980+
String[] kv = infoMetricParam.split("=");
981+
String value = System.getenv(kv[1]);
982+
if (value == null) {
983+
throw new Exception("Env var " + kv[1] + " is unresolved in " + MMESH_CUSTOM_ENV_VAR + ": \""
984+
+ infoMetricConfig + "\"");
985+
}
986+
infoMetricParams.put(kv[0], value);
987+
}
988+
}
961989
try {
962990
switch (type.toLowerCase()) {
963991
case "statsd":
964992
return new Metrics.StatsDMetrics(params);
965993
case "prometheus":
966-
return new Metrics.PrometheusMetrics(params);
994+
return new Metrics.PrometheusMetrics(params, infoMetricParams);
967995
case "disabled":
968996
logger.info("Metrics publishing is disabled (env var {}={})", MMESH_METRICS_ENV_VAR, metricsConfig);
969997
return Metrics.NO_OP_METRICS;

src/main/java/com/ibm/watson/modelmesh/ModelMeshEnvVars.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ private ModelMeshEnvVars() {}
5050
public static final String LOAD_FAILURE_EXPIRY_ENV_VAR = "MM_LOAD_FAILURE_EXPIRY_TIME_MS";
5151

5252
public static final String MMESH_METRICS_ENV_VAR = "MM_METRICS";
53+
public static final String MMESH_CUSTOM_ENV_VAR = "MM_INFO_METRICS";
5354

5455
public static final String LOG_EACH_INVOKE_ENV_VAR = "MM_LOG_EACH_INVOKE";
5556
public static final String SEND_DEST_ID_ENV_VAR = "MM_SEND_DEST_ID";

src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ protected int requestCount() {
6868

6969
static final String SCHEME = "https"; // or http
7070

71+
static final String METRIC_NAME = "assistant_deployment_info:relabel";
72+
static final String DEPLOYMENT_NAME = "ga-tf-mm";
73+
static final String SLOT_NAME = "ga";
74+
static final String COMPONENT_NAME = "tf-mm";
75+
static final String GROUP_NAME = "clu";
76+
7177
@Override
7278
protected Map<String, String> extraEnvVars() {
7379
return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME);
@@ -84,7 +90,7 @@ public void metricsTest() throws Exception {
8490

8591
// verify not found status
8692
ModelStatusInfo status = manageModels.getModelStatus(GetStatusRequest.newBuilder()
87-
.setModelId("i don't exist").build());
93+
.setModelId("I don't exist").build());
8894

8995
assertEquals(ModelStatus.NOT_FOUND, status.getStatus());
9096
assertEquals(0, status.getErrorsCount());
@@ -166,7 +172,6 @@ public void verifyMetrics() throws Exception {
166172
.filter(Matcher::matches)
167173
.collect(Collectors.toMap(m -> m.group(1), m -> Double.parseDouble(m.group(2))));
168174

169-
170175
System.out.println(metrics.size() + " metrics scraped");
171176

172177
// Spot check some expected metrics and values
@@ -198,5 +203,10 @@ public void verifyMetrics() throws Exception {
198203
assertEquals(0.0, metrics.get("jvm_buffer_pool_used_buffers{pool=\"mapped\",}")); // mmapped memory not used
199204
assertTrue(metrics.containsKey("jvm_gc_collection_seconds_sum{gc=\"G1 Young Generation\",}"));
200205
assertTrue(metrics.containsKey("jvm_memory_bytes_committed{area=\"heap\",}"));
206+
207+
// Info metrics
208+
assertEquals(1.0, metrics.get(METRIC_NAME + "{component=\"" + COMPONENT_NAME
209+
+ "\",slot=\"" + SLOT_NAME + "\",deployment=\"" + DEPLOYMENT_NAME + "\",group=\"" + GROUP_NAME + "\",}"));
201210
}
202-
}
211+
212+
}

0 commit comments

Comments
 (0)