Skip to content

Commit cd6e5ef

Browse files
Merge pull request opendatahub-io#37 from heyselbi/kserve-main
Sync main with upstream
2 parents 73beb20 + 45dde9d commit cd6e5ef

File tree

8 files changed

+176
-80
lines changed

8 files changed

+176
-80
lines changed

config/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The `examples` directory contains example Kustomization overlays to demonstrate
88

99
- `custom-example` is an example of an overlay to deploy model-mesh with a custom model-serving runtime image
1010
- `custom-example-uds` extends `custom-example` to use a unix domain socket for intra-pod communication
11-
- `type-constraints-example` is an example of a heterogeneous model-mesh deployment comprising two kubernetes Deployments with a single Service. It employs type constraints to control assignments of models to pod subsets based on laebels.
11+
- `type-constraints-example` is an example of a heterogeneous model-mesh deployment comprising two kubernetes Deployments with a single Service. It employs type constraints to control assignments of models to pod subsets based on labels.
1212

1313
The following patches are provided in `base/patches` and can be selectively included/modified in your custom overlay:
1414

src/main/java/com/ibm/watson/modelmesh/Metrics.java

Lines changed: 84 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.ibm.watson.modelmesh;
1818

19+
import com.google.common.base.Strings;
1920
import com.ibm.watson.prometheus.Counter;
2021
import com.ibm.watson.prometheus.Gauge;
2122
import com.ibm.watson.prometheus.Histogram;
@@ -36,34 +37,39 @@
3637
import org.apache.logging.log4j.LogManager;
3738
import org.apache.logging.log4j.Logger;
3839

39-
import java.lang.reflect.Array;
4040
import java.net.SocketAddress;
4141
import java.nio.channels.DatagramChannel;
42-
import java.util.*;
42+
import java.util.Collections;
43+
import java.util.EnumMap;
44+
import java.util.HashMap;
45+
import java.util.HashSet;
46+
import java.util.Map;
4347
import java.util.Map.Entry;
48+
import java.util.Set;
4449
import java.util.concurrent.Callable;
4550
import java.util.concurrent.LinkedBlockingQueue;
4651
import java.util.concurrent.TimeUnit;
4752
import java.util.stream.Stream;
4853

4954
import static com.ibm.watson.modelmesh.Metric.*;
55+
import static com.ibm.watson.modelmesh.Metric.MetricType.*;
5056
import static com.ibm.watson.modelmesh.ModelMesh.M;
5157
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR;
52-
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR;
5358
import static java.util.concurrent.TimeUnit.*;
5459

5560
/**
5661
*
5762
*/
5863
interface Metrics extends AutoCloseable {
64+
boolean isPerModelMetricsEnabled();
5965

6066
boolean isEnabled();
6167

6268
void logTimingMetricSince(Metric metric, long prevTime, boolean isNano);
6369

64-
void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano);
70+
void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId);
6571

66-
void logSizeEventMetric(Metric metric, long value);
72+
void logSizeEventMetric(Metric metric, long value, String modelId);
6773

6874
void logGaugeMetric(Metric metric, long value);
6975

@@ -101,7 +107,7 @@ default void logInstanceStats(final InstanceRecord ir) {
101107
* @param respPayloadSize response payload size in bytes (or -1 if not applicable)
102108
*/
103109
void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
104-
int reqPayloadSize, int respPayloadSize);
110+
int reqPayloadSize, int respPayloadSize, String modelId, String vModelId);
105111

106112
default void registerGlobals() {}
107113

@@ -111,6 +117,11 @@ default void unregisterGlobals() {}
111117
default void close() {}
112118

113119
Metrics NO_OP_METRICS = new Metrics() {
120+
@Override
121+
public boolean isPerModelMetricsEnabled() {
122+
return false;
123+
}
124+
114125
@Override
115126
public boolean isEnabled() {
116127
return false;
@@ -120,10 +131,10 @@ public boolean isEnabled() {
120131
public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {}
121132

122133
@Override
123-
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {}
134+
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId){}
124135

125136
@Override
126-
public void logSizeEventMetric(Metric metric, long value) {}
137+
public void logSizeEventMetric(Metric metric, long value, String modelId){}
127138

128139
@Override
129140
public void logGaugeMetric(Metric metric, long value) {}
@@ -136,7 +147,7 @@ public void logInstanceStats(InstanceRecord ir) {}
136147

137148
@Override
138149
public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
139-
int reqPayloadSize, int respPayloadSize) {}
150+
int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {}
140151
};
141152

142153
final class PrometheusMetrics implements Metrics {
@@ -154,12 +165,14 @@ final class PrometheusMetrics implements Metrics {
154165
private final CollectorRegistry registry;
155166
private final NettyServer metricServer;
156167
private final boolean shortNames;
168+
private final boolean perModelMetricsEnabled;
157169
private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class);
158170

159171
public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception {
160172
int port = 2112;
161173
boolean shortNames = true;
162174
boolean https = true;
175+
boolean perModelMetricsEnabled = true;
163176
String memMetrics = "all"; // default to all
164177
for (Entry<String, String> ent : params.entrySet()) {
165178
switch (ent.getKey()) {
@@ -170,6 +183,9 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
170183
throw new Exception("Invalid metrics port: " + ent.getValue());
171184
}
172185
break;
186+
case "per_model_metrics":
187+
perModelMetricsEnabled= "true".equalsIgnoreCase(ent.getValue());
188+
break;
173189
case "fq_names":
174190
shortNames = !"true".equalsIgnoreCase(ent.getValue());
175191
break;
@@ -188,6 +204,7 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
188204
throw new Exception("Unrecognized metrics config parameter: " + ent.getKey());
189205
}
190206
}
207+
this.perModelMetricsEnabled = perModelMetricsEnabled;
191208

192209
registry = new CollectorRegistry();
193210
for (Metric m : Metric.values()) {
@@ -220,10 +237,15 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
220237
}
221238

222239
if (m == API_REQUEST_TIME || m == API_REQUEST_COUNT || m == INVOKE_MODEL_TIME
223-
|| m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) {
224-
builder.labelNames("method", "code");
240+
|| m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) {
241+
if (this.perModelMetricsEnabled) {
242+
builder.labelNames("method", "code", "modelId", "vModelId");
243+
} else {
244+
builder.labelNames("method", "code");
245+
}
246+
} else if (this.perModelMetricsEnabled && m.type != GAUGE && m.type != COUNTER && m.type != COUNTER_WITH_HISTO) {
247+
builder.labelNames("modelId", "vModelId");
225248
}
226-
227249
Collector collector = builder.name(m.promName).help(m.description).create();
228250
metricsMap.put(m, collector);
229251
if (!m.global) {
@@ -330,6 +352,11 @@ public void close() {
330352
this.metricServer.close();
331353
}
332354

355+
@Override
356+
public boolean isPerModelMetricsEnabled() {
357+
return perModelMetricsEnabled;
358+
}
359+
333360
@Override
334361
public boolean isEnabled() {
335362
return true;
@@ -342,13 +369,23 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
342369
}
343370

344371
@Override
345-
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {
346-
((Histogram) metricsMap.get(metric)).observe(isNano ? elapsed / M : elapsed);
372+
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) {
373+
Histogram histogram = (Histogram) metricsMap.get(metric);
374+
if (perModelMetricsEnabled && modelId != null) {
375+
histogram.labels(modelId, "").observe(isNano ? elapsed / M : elapsed);
376+
} else {
377+
histogram.observe(isNano ? elapsed / M : elapsed);
378+
}
347379
}
348380

349381
@Override
350-
public void logSizeEventMetric(Metric metric, long value) {
351-
((Histogram) metricsMap.get(metric)).observe(value * metric.newMultiplier);
382+
public void logSizeEventMetric(Metric metric, long value, String modelId) {
383+
Histogram histogram = (Histogram) metricsMap.get(metric);
384+
if (perModelMetricsEnabled) {
385+
histogram.labels(modelId, "").observe(value * metric.newMultiplier);
386+
} else {
387+
histogram.observe(value * metric.newMultiplier);
388+
}
352389
}
353390

354391
@Override
@@ -365,23 +402,37 @@ public void logCounterMetric(Metric metric) {
365402

366403
@Override
367404
public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
368-
int reqPayloadSize, int respPayloadSize) {
405+
int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {
369406
final long elapsedMillis = elapsedNanos / M;
370407
final Histogram timingHisto = (Histogram) metricsMap
371408
.get(external ? API_REQUEST_TIME : INVOKE_MODEL_TIME);
372409

373410
int idx = shortNames ? name.indexOf('/') : -1;
374-
final String methodName = idx == -1 ? name : name.substring(idx + 1);
375-
376-
timingHisto.labels(methodName, code.name()).observe(elapsedMillis);
377-
411+
String methodName = idx == -1 ? name : name.substring(idx + 1);
412+
if (perModelMetricsEnabled) {
413+
modelId = Strings.nullToEmpty(modelId);
414+
vModelId = Strings.nullToEmpty(vModelId);
415+
}
416+
if (perModelMetricsEnabled) {
417+
timingHisto.labels(methodName, code.name(), modelId, vModelId).observe(elapsedMillis);
418+
} else {
419+
timingHisto.labels(methodName, code.name()).observe(elapsedMillis);
420+
}
378421
if (reqPayloadSize != -1) {
379-
((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE))
380-
.labels(methodName, code.name()).observe(reqPayloadSize);
422+
Histogram reqPayloadHisto = (Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE);
423+
if (perModelMetricsEnabled) {
424+
reqPayloadHisto.labels(methodName, code.name(), modelId, vModelId).observe(reqPayloadSize);
425+
} else {
426+
reqPayloadHisto.labels(methodName, code.name()).observe(reqPayloadSize);
427+
}
381428
}
382429
if (respPayloadSize != -1) {
383-
((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE))
384-
.labels(methodName, code.name()).observe(respPayloadSize);
430+
Histogram respPayloadHisto = (Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE);
431+
if (perModelMetricsEnabled) {
432+
respPayloadHisto.labels(methodName, code.name(), modelId, vModelId).observe(respPayloadSize);
433+
} else {
434+
respPayloadHisto.labels(methodName, code.name()).observe(respPayloadSize);
435+
}
385436
}
386437
}
387438

@@ -437,6 +488,11 @@ protected StatsDSender createSender(Callable<SocketAddress> addressLookup, int q
437488
+ (shortNames ? "short" : "fully-qualified") + " method names");
438489
}
439490

491+
@Override
492+
public boolean isPerModelMetricsEnabled() {
493+
return false;
494+
}
495+
440496
@Override
441497
public boolean isEnabled() {
442498
return true;
@@ -454,12 +510,12 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
454510
}
455511

456512
@Override
457-
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {
513+
public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) {
458514
client.recordExecutionTime(name(metric), isNano ? elapsed / M : elapsed);
459515
}
460516

461517
@Override
462-
public void logSizeEventMetric(Metric metric, long value) {
518+
public void logSizeEventMetric(Metric metric, long value, String modelId) {
463519
if (!legacy) {
464520
value *= metric.newMultiplier;
465521
}
@@ -497,7 +553,7 @@ static String[] getOkTags(String method, boolean shortName) {
497553

498554
@Override
499555
public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
500-
int reqPayloadSize, int respPayloadSize) {
556+
int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {
501557
final StatsDClient client = this.client;
502558
final long elapsedMillis = elapsedNanos / M;
503559
final String countName = name(external ? API_REQUEST_COUNT : INVOKE_MODEL_COUNT);

0 commit comments

Comments
 (0)