16
16
17
17
package com .ibm .watson .modelmesh ;
18
18
19
+ import com .google .common .base .Strings ;
19
20
import com .ibm .watson .prometheus .Counter ;
20
21
import com .ibm .watson .prometheus .Gauge ;
21
22
import com .ibm .watson .prometheus .Histogram ;
36
37
import org .apache .logging .log4j .LogManager ;
37
38
import org .apache .logging .log4j .Logger ;
38
39
39
- import java .lang .reflect .Array ;
40
40
import java .net .SocketAddress ;
41
41
import java .nio .channels .DatagramChannel ;
42
- import java .util .*;
42
+ import java .util .Collections ;
43
+ import java .util .EnumMap ;
44
+ import java .util .HashMap ;
45
+ import java .util .HashSet ;
46
+ import java .util .Map ;
43
47
import java .util .Map .Entry ;
48
+ import java .util .Set ;
44
49
import java .util .concurrent .Callable ;
45
50
import java .util .concurrent .LinkedBlockingQueue ;
46
51
import java .util .concurrent .TimeUnit ;
47
52
import java .util .stream .Stream ;
48
53
49
54
import static com .ibm .watson .modelmesh .Metric .*;
55
+ import static com .ibm .watson .modelmesh .Metric .MetricType .*;
50
56
import static com .ibm .watson .modelmesh .ModelMesh .M ;
51
57
import static com .ibm .watson .modelmesh .ModelMeshEnvVars .MMESH_CUSTOM_ENV_VAR ;
52
- import static com .ibm .watson .modelmesh .ModelMeshEnvVars .MMESH_METRICS_ENV_VAR ;
53
58
import static java .util .concurrent .TimeUnit .*;
54
59
55
60
/**
56
61
*
57
62
*/
58
63
interface Metrics extends AutoCloseable {
64
+ boolean isPerModelMetricsEnabled ();
59
65
60
66
boolean isEnabled ();
61
67
62
68
void logTimingMetricSince (Metric metric , long prevTime , boolean isNano );
63
69
64
- void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano );
70
+ void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano , String modelId );
65
71
66
- void logSizeEventMetric (Metric metric , long value );
72
+ void logSizeEventMetric (Metric metric , long value , String modelId );
67
73
68
74
void logGaugeMetric (Metric metric , long value );
69
75
@@ -101,7 +107,7 @@ default void logInstanceStats(final InstanceRecord ir) {
101
107
* @param respPayloadSize response payload size in bytes (or -1 if not applicable)
102
108
*/
103
109
void logRequestMetrics (boolean external , String name , long elapsedNanos , Code code ,
104
- int reqPayloadSize , int respPayloadSize );
110
+ int reqPayloadSize , int respPayloadSize , String modelId , String vModelId );
105
111
106
112
default void registerGlobals () {}
107
113
@@ -111,6 +117,11 @@ default void unregisterGlobals() {}
111
117
default void close () {}
112
118
113
119
Metrics NO_OP_METRICS = new Metrics () {
120
+ @ Override
121
+ public boolean isPerModelMetricsEnabled () {
122
+ return false ;
123
+ }
124
+
114
125
@ Override
115
126
public boolean isEnabled () {
116
127
return false ;
@@ -120,10 +131,10 @@ public boolean isEnabled() {
120
131
public void logTimingMetricSince (Metric metric , long prevTime , boolean isNano ) {}
121
132
122
133
@ Override
123
- public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano ) {}
134
+ public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano , String modelId ) {}
124
135
125
136
@ Override
126
- public void logSizeEventMetric (Metric metric , long value ) {}
137
+ public void logSizeEventMetric (Metric metric , long value , String modelId ) {}
127
138
128
139
@ Override
129
140
public void logGaugeMetric (Metric metric , long value ) {}
@@ -136,7 +147,7 @@ public void logInstanceStats(InstanceRecord ir) {}
136
147
137
148
@ Override
138
149
public void logRequestMetrics (boolean external , String name , long elapsedNanos , Code code ,
139
- int reqPayloadSize , int respPayloadSize ) {}
150
+ int reqPayloadSize , int respPayloadSize , String modelId , String vModelId ) {}
140
151
};
141
152
142
153
final class PrometheusMetrics implements Metrics {
@@ -154,12 +165,14 @@ final class PrometheusMetrics implements Metrics {
154
165
private final CollectorRegistry registry ;
155
166
private final NettyServer metricServer ;
156
167
private final boolean shortNames ;
168
+ private final boolean perModelMetricsEnabled ;
157
169
private final EnumMap <Metric , Collector > metricsMap = new EnumMap <>(Metric .class );
158
170
159
171
public PrometheusMetrics (Map <String , String > params , Map <String , String > infoMetricParams ) throws Exception {
160
172
int port = 2112 ;
161
173
boolean shortNames = true ;
162
174
boolean https = true ;
175
+ boolean perModelMetricsEnabled = true ;
163
176
String memMetrics = "all" ; // default to all
164
177
for (Entry <String , String > ent : params .entrySet ()) {
165
178
switch (ent .getKey ()) {
@@ -170,6 +183,9 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
170
183
throw new Exception ("Invalid metrics port: " + ent .getValue ());
171
184
}
172
185
break ;
186
+ case "per_model_metrics" :
187
+ perModelMetricsEnabled = "true" .equalsIgnoreCase (ent .getValue ());
188
+ break ;
173
189
case "fq_names" :
174
190
shortNames = !"true" .equalsIgnoreCase (ent .getValue ());
175
191
break ;
@@ -188,6 +204,7 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
188
204
throw new Exception ("Unrecognized metrics config parameter: " + ent .getKey ());
189
205
}
190
206
}
207
+ this .perModelMetricsEnabled = perModelMetricsEnabled ;
191
208
192
209
registry = new CollectorRegistry ();
193
210
for (Metric m : Metric .values ()) {
@@ -220,10 +237,15 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
220
237
}
221
238
222
239
if (m == API_REQUEST_TIME || m == API_REQUEST_COUNT || m == INVOKE_MODEL_TIME
223
- || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE ) {
224
- builder .labelNames ("method" , "code" );
240
+ || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE ) {
241
+ if (this .perModelMetricsEnabled ) {
242
+ builder .labelNames ("method" , "code" , "modelId" , "vModelId" );
243
+ } else {
244
+ builder .labelNames ("method" , "code" );
245
+ }
246
+ } else if (this .perModelMetricsEnabled && m .type != GAUGE && m .type != COUNTER && m .type != COUNTER_WITH_HISTO ) {
247
+ builder .labelNames ("modelId" , "vModelId" );
225
248
}
226
-
227
249
Collector collector = builder .name (m .promName ).help (m .description ).create ();
228
250
metricsMap .put (m , collector );
229
251
if (!m .global ) {
@@ -330,6 +352,11 @@ public void close() {
330
352
this .metricServer .close ();
331
353
}
332
354
355
+ @ Override
356
+ public boolean isPerModelMetricsEnabled () {
357
+ return perModelMetricsEnabled ;
358
+ }
359
+
333
360
@ Override
334
361
public boolean isEnabled () {
335
362
return true ;
@@ -342,13 +369,23 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
342
369
}
343
370
344
371
@ Override
345
- public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano ) {
346
- ((Histogram ) metricsMap .get (metric )).observe (isNano ? elapsed / M : elapsed );
372
+ public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano , String modelId ) {
373
+ Histogram histogram = (Histogram ) metricsMap .get (metric );
374
+ if (perModelMetricsEnabled && modelId != null ) {
375
+ histogram .labels (modelId , "" ).observe (isNano ? elapsed / M : elapsed );
376
+ } else {
377
+ histogram .observe (isNano ? elapsed / M : elapsed );
378
+ }
347
379
}
348
380
349
381
@ Override
350
- public void logSizeEventMetric (Metric metric , long value ) {
351
- ((Histogram ) metricsMap .get (metric )).observe (value * metric .newMultiplier );
382
+ public void logSizeEventMetric (Metric metric , long value , String modelId ) {
383
+ Histogram histogram = (Histogram ) metricsMap .get (metric );
384
+ if (perModelMetricsEnabled ) {
385
+ histogram .labels (modelId , "" ).observe (value * metric .newMultiplier );
386
+ } else {
387
+ histogram .observe (value * metric .newMultiplier );
388
+ }
352
389
}
353
390
354
391
@ Override
@@ -365,23 +402,37 @@ public void logCounterMetric(Metric metric) {
365
402
366
403
@ Override
367
404
public void logRequestMetrics (boolean external , String name , long elapsedNanos , Code code ,
368
- int reqPayloadSize , int respPayloadSize ) {
405
+ int reqPayloadSize , int respPayloadSize , String modelId , String vModelId ) {
369
406
final long elapsedMillis = elapsedNanos / M ;
370
407
final Histogram timingHisto = (Histogram ) metricsMap
371
408
.get (external ? API_REQUEST_TIME : INVOKE_MODEL_TIME );
372
409
373
410
int idx = shortNames ? name .indexOf ('/' ) : -1 ;
374
- final String methodName = idx == -1 ? name : name .substring (idx + 1 );
375
-
376
- timingHisto .labels (methodName , code .name ()).observe (elapsedMillis );
377
-
411
+ String methodName = idx == -1 ? name : name .substring (idx + 1 );
412
+ if (perModelMetricsEnabled ) {
413
+ modelId = Strings .nullToEmpty (modelId );
414
+ vModelId = Strings .nullToEmpty (vModelId );
415
+ }
416
+ if (perModelMetricsEnabled ) {
417
+ timingHisto .labels (methodName , code .name (), modelId , vModelId ).observe (elapsedMillis );
418
+ } else {
419
+ timingHisto .labels (methodName , code .name ()).observe (elapsedMillis );
420
+ }
378
421
if (reqPayloadSize != -1 ) {
379
- ((Histogram ) metricsMap .get (REQUEST_PAYLOAD_SIZE ))
380
- .labels (methodName , code .name ()).observe (reqPayloadSize );
422
+ Histogram reqPayloadHisto = (Histogram ) metricsMap .get (REQUEST_PAYLOAD_SIZE );
423
+ if (perModelMetricsEnabled ) {
424
+ reqPayloadHisto .labels (methodName , code .name (), modelId , vModelId ).observe (reqPayloadSize );
425
+ } else {
426
+ reqPayloadHisto .labels (methodName , code .name ()).observe (reqPayloadSize );
427
+ }
381
428
}
382
429
if (respPayloadSize != -1 ) {
383
- ((Histogram ) metricsMap .get (RESPONSE_PAYLOAD_SIZE ))
384
- .labels (methodName , code .name ()).observe (respPayloadSize );
430
+ Histogram respPayloadHisto = (Histogram ) metricsMap .get (RESPONSE_PAYLOAD_SIZE );
431
+ if (perModelMetricsEnabled ) {
432
+ respPayloadHisto .labels (methodName , code .name (), modelId , vModelId ).observe (respPayloadSize );
433
+ } else {
434
+ respPayloadHisto .labels (methodName , code .name ()).observe (respPayloadSize );
435
+ }
385
436
}
386
437
}
387
438
@@ -437,6 +488,11 @@ protected StatsDSender createSender(Callable<SocketAddress> addressLookup, int q
437
488
+ (shortNames ? "short" : "fully-qualified" ) + " method names" );
438
489
}
439
490
491
+ @ Override
492
+ public boolean isPerModelMetricsEnabled () {
493
+ return false ;
494
+ }
495
+
440
496
@ Override
441
497
public boolean isEnabled () {
442
498
return true ;
@@ -454,12 +510,12 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
454
510
}
455
511
456
512
@ Override
457
- public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano ) {
513
+ public void logTimingMetricDuration (Metric metric , long elapsed , boolean isNano , String modelId ) {
458
514
client .recordExecutionTime (name (metric ), isNano ? elapsed / M : elapsed );
459
515
}
460
516
461
517
@ Override
462
- public void logSizeEventMetric (Metric metric , long value ) {
518
+ public void logSizeEventMetric (Metric metric , long value , String modelId ) {
463
519
if (!legacy ) {
464
520
value *= metric .newMultiplier ;
465
521
}
@@ -497,7 +553,7 @@ static String[] getOkTags(String method, boolean shortName) {
497
553
498
554
@ Override
499
555
public void logRequestMetrics (boolean external , String name , long elapsedNanos , Code code ,
500
- int reqPayloadSize , int respPayloadSize ) {
556
+ int reqPayloadSize , int respPayloadSize , String modelId , String vModelId ) {
501
557
final StatsDClient client = this .client ;
502
558
final long elapsedMillis = elapsedNanos / M ;
503
559
final String countName = name (external ? API_REQUEST_COUNT : INVOKE_MODEL_COUNT );
0 commit comments