1
- // Copied from https://github.com/SeldonIO/MLServer/blob/9a7c77f93dd61ac395e389280195d8adc413aad7 /proto/dataplane.proto
1
+ // Copied from https://github.com/SeldonIO/MLServer/blob/1.2.4 /proto/dataplane.proto
2
2
syntax = "proto3" ;
3
3
4
4
package inference ;
@@ -25,6 +25,18 @@ service GRPCInferenceService
25
25
26
26
// Perform inference using a specific model.
27
27
rpc ModelInfer (ModelInferRequest ) returns (ModelInferResponse ) {}
28
+
29
+ // Get the index of model repository contents.
30
+ rpc RepositoryIndex (RepositoryIndexRequest )
31
+ returns (RepositoryIndexResponse ) {}
32
+
33
+ // Load or reload a model from a repository.
34
+ rpc RepositoryModelLoad (RepositoryModelLoadRequest )
35
+ returns (RepositoryModelLoadResponse ) {}
36
+
37
+ // Unload a model.
38
+ rpc RepositoryModelUnload (RepositoryModelUnloadRequest )
39
+ returns (RepositoryModelUnloadResponse ) {}
28
40
}
29
41
30
42
@@ -113,6 +125,10 @@ message ModelMetadataResponse
113
125
// The tensor shape. A variable-size dimension is represented
114
126
// by a -1 value.
115
127
repeated int64 shape = 3 ;
128
+
129
+ // Optional default parameters for input.
130
+ // NOTE: This is an extension to the standard
131
+ map <string , InferParameter > parameters = 4 ;
116
132
}
117
133
118
134
// The model name.
@@ -129,6 +145,10 @@ message ModelMetadataResponse
129
145
130
146
// The model's outputs.
131
147
repeated TensorMetadata outputs = 5 ;
148
+
149
+ // Optional default parameters for the request / response.
150
+ // NOTE: This is an extension to the standard
151
+ map <string , InferParameter > parameters = 6 ;
132
152
}
133
153
134
154
//
@@ -152,6 +172,8 @@ message ModelInferRequest
152
172
map <string , InferParameter > parameters = 4 ;
153
173
154
174
// The input tensor data.
175
+ // This field must not be specified if tensor contents are being specified
176
+ // in ModelInferRequest.raw_input_contents.
155
177
InferTensorContents contents = 5 ;
156
178
}
157
179
@@ -185,6 +207,25 @@ message ModelInferRequest
185
207
// The requested output tensors for the inference. Optional, if not
186
208
// specified all outputs produced by the model will be returned.
187
209
repeated InferRequestedOutputTensor outputs = 6 ;
210
+
211
+ // The data contained in an input tensor can be represented in "raw" bytes
212
+ // form or in the repeated type that matches the tensor's data type. Using
213
+ // the "raw" bytes form will typically allow higher performance due to the
214
+ // way protobuf allocation and reuse interacts with GRPC. For example, see
215
+ // https://github.com/grpc/grpc/issues/23231.
216
+ //
217
+ // To use the raw representation 'raw_input_contents' must be initialized
218
+ // with data for each tensor in the same order as 'inputs'. For each tensor,
219
+ // the size of this content must match what is expected by the tensor's shape
220
+ // and data type. The raw data must be the flattened, one-dimensional,
221
+ // row-major order of the tensor elements without any stride or padding
222
+ // between the elements. Note that the FP16 and BF16 data types must be
223
+ // represented as raw content as there is no specific data type for a 16-bit
224
+ // float type.
225
+ //
226
+ // If this field is specified then InferInputTensor::contents must not be
227
+ // specified for any input tensor.
228
+ repeated bytes raw_input_contents = 7 ;
188
229
}
189
230
190
231
message ModelInferResponse
@@ -205,6 +246,8 @@ message ModelInferResponse
205
246
map <string , InferParameter > parameters = 4 ;
206
247
207
248
// The output tensor data.
249
+ // This field must not be specified if tensor contents are being specified
250
+ // in ModelInferResponse.raw_output_contents.
208
251
InferTensorContents contents = 5 ;
209
252
}
210
253
@@ -222,6 +265,25 @@ message ModelInferResponse
222
265
223
266
// The output tensors holding inference results.
224
267
repeated InferOutputTensor outputs = 5 ;
268
+
269
+ // The data contained in an output tensor can be represented in "raw" bytes
270
+ // form or in the repeated type that matches the tensor's data type. Using
271
+ // the "raw" bytes form will typically allow higher performance due to the
272
+ // way protobuf allocation and reuse interacts with GRPC. For example, see
273
+ // https://github.com/grpc/grpc/issues/23231.
274
+ //
275
+ // To use the raw representation 'raw_output_contents' must be initialized
276
+ // with data for each tensor in the same order as 'outputs'. For each tensor,
277
+ // the size of this content must match what is expected by the tensor's shape
278
+ // and data type. The raw data must be the flattened, one-dimensional,
279
+ // row-major order of the tensor elements without any stride or padding
280
+ // between the elements. Note that the FP16 and BF16 data types must be
281
+ // represented as raw content as there is no specific data type for a 16-bit
282
+ // float type.
283
+ //
284
+ // If this field is specified then InferOutputTensor::contents must not be
285
+ // specified for any output tensor.
286
+ repeated bytes raw_output_contents = 6 ;
225
287
}
226
288
227
289
@@ -296,3 +358,98 @@ message InferTensorContents
296
358
// one-dimensional, row-major order of the tensor elements.
297
359
repeated bytes bytes_contents = 8 ;
298
360
}
361
+
362
+ //
363
+ // Messages for the Repository API
364
+ //
365
+ // NOTE: These messages used to exist previously on a different protobuf
366
+ // definition. However, they have now been merged with the main
367
+ // GRPCInferenceService.
368
+ //
369
+
370
+
371
+ // An model repository parameter value.
372
+ message ModelRepositoryParameter
373
+ {
374
+ // The parameter value can be a string, an int64 or a boolean
375
+ oneof parameter_choice
376
+ {
377
+ // A boolean parameter value.
378
+ bool bool_param = 1 ;
379
+
380
+ // An int64 parameter value.
381
+ int64 int64_param = 2 ;
382
+
383
+ // A string parameter value.
384
+ string string_param = 3 ;
385
+
386
+ // A bytes parameter value.
387
+ bytes bytes_param = 4 ;
388
+ }
389
+ }
390
+
391
+
392
+ message RepositoryIndexRequest
393
+ {
394
+ // The name of the repository. If empty the index is returned
395
+ // for all repositories.
396
+ string repository_name = 1 ;
397
+
398
+ // If true return only models currently ready for inferencing.
399
+ bool ready = 2 ;
400
+ }
401
+
402
+ message RepositoryIndexResponse
403
+ {
404
+ // Index entry for a model.
405
+ message ModelIndex {
406
+ // The name of the model.
407
+ string name = 1 ;
408
+
409
+ // The version of the model.
410
+ string version = 2 ;
411
+
412
+ // The state of the model.
413
+ string state = 3 ;
414
+
415
+ // The reason, if any, that the model is in the given state.
416
+ string reason = 4 ;
417
+ }
418
+
419
+ // An index entry for each model.
420
+ repeated ModelIndex models = 1 ;
421
+ }
422
+
423
+ message RepositoryModelLoadRequest
424
+ {
425
+ // The name of the repository to load from. If empty the model
426
+ // is loaded from any repository.
427
+ string repository_name = 1 ;
428
+
429
+ // The name of the model to load, or reload.
430
+ string model_name = 2 ;
431
+
432
+ // Optional model repository request parameters.
433
+ map <string , ModelRepositoryParameter > parameters = 3 ;
434
+ }
435
+
436
+ message RepositoryModelLoadResponse
437
+ {
438
+ }
439
+
440
+ message RepositoryModelUnloadRequest
441
+ {
442
+ // The name of the repository from which the model was originally
443
+ // loaded. If empty the repository is not considered.
444
+ string repository_name = 1 ;
445
+
446
+ // The name of the model to unload.
447
+ string model_name = 2 ;
448
+
449
+ // Optional model repository request parameters.
450
+ map <string , ModelRepositoryParameter > parameters = 3 ;
451
+ }
452
+
453
+ message RepositoryModelUnloadResponse
454
+ {
455
+ }
0 commit comments