Skip to content

Commit f9dc1dc

Browse files
authored
chore: Update MLServer protobuf (#45)
#### Motivation Related to [updating the MLServer runtime image](kserve/modelmesh-serving#355), the `ModelRepository` endpoint was deprecated. References: kserve/modelmesh-serving#159 SeldonIO/MLServer#616 #### Modifications - Updated protobuf - Updated mock server testing - Updated runtime-adapter code to call new endpoint #### Result - MLServer runtime adapter no longer uses the deprecated model repository API Signed-off-by: Rafael Vasquez <[email protected]>
1 parent 600f092 commit f9dc1dc

File tree

10 files changed

+1631
-1513
lines changed

10 files changed

+1631
-1513
lines changed

internal/proto/mlserver/dataplane/dataplane.pb.go

Lines changed: 1102 additions & 288 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/proto/mlserver/dataplane/dataplane.proto

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copied from https://github.com/SeldonIO/MLServer/blob/9a7c77f93dd61ac395e389280195d8adc413aad7/proto/dataplane.proto
1+
// Copied from https://github.com/SeldonIO/MLServer/blob/1.2.4/proto/dataplane.proto
22
syntax = "proto3";
33

44
package inference;
@@ -25,6 +25,18 @@ service GRPCInferenceService
2525

2626
// Perform inference using a specific model.
2727
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
28+
29+
// Get the index of model repository contents.
30+
rpc RepositoryIndex(RepositoryIndexRequest)
31+
returns (RepositoryIndexResponse) {}
32+
33+
// Load or reload a model from a repository.
34+
rpc RepositoryModelLoad(RepositoryModelLoadRequest)
35+
returns (RepositoryModelLoadResponse) {}
36+
37+
// Unload a model.
38+
rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
39+
returns (RepositoryModelUnloadResponse) {}
2840
}
2941

3042

@@ -113,6 +125,10 @@ message ModelMetadataResponse
113125
// The tensor shape. A variable-size dimension is represented
114126
// by a -1 value.
115127
repeated int64 shape = 3;
128+
129+
// Optional default parameters for input.
130+
// NOTE: This is an extension to the standard
131+
map<string, InferParameter> parameters = 4;
116132
}
117133

118134
// The model name.
@@ -129,6 +145,10 @@ message ModelMetadataResponse
129145

130146
// The model's outputs.
131147
repeated TensorMetadata outputs = 5;
148+
149+
// Optional default parameters for the request / response.
150+
// NOTE: This is an extension to the standard
151+
map<string, InferParameter> parameters = 6;
132152
}
133153

134154
//
@@ -152,6 +172,8 @@ message ModelInferRequest
152172
map<string, InferParameter> parameters = 4;
153173

154174
// The input tensor data.
175+
// This field must not be specified if tensor contents are being specified
176+
// in ModelInferRequest.raw_input_contents.
155177
InferTensorContents contents = 5;
156178
}
157179

@@ -185,6 +207,25 @@ message ModelInferRequest
185207
// The requested output tensors for the inference. Optional, if not
186208
// specified all outputs produced by the model will be returned.
187209
repeated InferRequestedOutputTensor outputs = 6;
210+
211+
// The data contained in an input tensor can be represented in "raw" bytes
212+
// form or in the repeated type that matches the tensor's data type. Using
213+
// the "raw" bytes form will typically allow higher performance due to the
214+
// way protobuf allocation and reuse interacts with GRPC. For example, see
215+
// https://github.com/grpc/grpc/issues/23231.
216+
//
217+
// To use the raw representation 'raw_input_contents' must be initialized
218+
// with data for each tensor in the same order as 'inputs'. For each tensor,
219+
// the size of this content must match what is expected by the tensor's shape
220+
// and data type. The raw data must be the flattened, one-dimensional,
221+
// row-major order of the tensor elements without any stride or padding
222+
// between the elements. Note that the FP16 and BF16 data types must be
223+
// represented as raw content as there is no specific data type for a 16-bit
224+
// float type.
225+
//
226+
// If this field is specified then InferInputTensor::contents must not be
227+
// specified for any input tensor.
228+
repeated bytes raw_input_contents = 7;
188229
}
189230

190231
message ModelInferResponse
@@ -205,6 +246,8 @@ message ModelInferResponse
205246
map<string, InferParameter> parameters = 4;
206247

207248
// The output tensor data.
249+
// This field must not be specified if tensor contents are being specified
250+
// in ModelInferResponse.raw_output_contents.
208251
InferTensorContents contents = 5;
209252
}
210253

@@ -222,6 +265,25 @@ message ModelInferResponse
222265

223266
// The output tensors holding inference results.
224267
repeated InferOutputTensor outputs = 5;
268+
269+
// The data contained in an output tensor can be represented in "raw" bytes
270+
// form or in the repeated type that matches the tensor's data type. Using
271+
// the "raw" bytes form will typically allow higher performance due to the
272+
// way protobuf allocation and reuse interacts with GRPC. For example, see
273+
// https://github.com/grpc/grpc/issues/23231.
274+
//
275+
// To use the raw representation 'raw_output_contents' must be initialized
276+
// with data for each tensor in the same order as 'outputs'. For each tensor,
277+
// the size of this content must match what is expected by the tensor's shape
278+
// and data type. The raw data must be the flattened, one-dimensional,
279+
// row-major order of the tensor elements without any stride or padding
280+
// between the elements. Note that the FP16 and BF16 data types must be
281+
// represented as raw content as there is no specific data type for a 16-bit
282+
// float type.
283+
//
284+
// If this field is specified then InferOutputTensor::contents must not be
285+
// specified for any output tensor.
286+
repeated bytes raw_output_contents = 6;
225287
}
226288

227289

@@ -296,3 +358,98 @@ message InferTensorContents
296358
// one-dimensional, row-major order of the tensor elements.
297359
repeated bytes bytes_contents = 8;
298360
}
361+
362+
//
363+
// Messages for the Repository API
364+
//
365+
// NOTE: These messages used to exist previously on a different protobuf
366+
// definition. However, they have now been merged with the main
367+
// GRPCInferenceService.
368+
//
369+
370+
371+
// An model repository parameter value.
372+
message ModelRepositoryParameter
373+
{
374+
// The parameter value can be a string, an int64 or a boolean
375+
oneof parameter_choice
376+
{
377+
// A boolean parameter value.
378+
bool bool_param = 1;
379+
380+
// An int64 parameter value.
381+
int64 int64_param = 2;
382+
383+
// A string parameter value.
384+
string string_param = 3;
385+
386+
// A bytes parameter value.
387+
bytes bytes_param = 4;
388+
}
389+
}
390+
391+
392+
message RepositoryIndexRequest
393+
{
394+
// The name of the repository. If empty the index is returned
395+
// for all repositories.
396+
string repository_name = 1;
397+
398+
// If true return only models currently ready for inferencing.
399+
bool ready = 2;
400+
}
401+
402+
message RepositoryIndexResponse
403+
{
404+
// Index entry for a model.
405+
message ModelIndex {
406+
// The name of the model.
407+
string name = 1;
408+
409+
// The version of the model.
410+
string version = 2;
411+
412+
// The state of the model.
413+
string state = 3;
414+
415+
// The reason, if any, that the model is in the given state.
416+
string reason = 4;
417+
}
418+
419+
// An index entry for each model.
420+
repeated ModelIndex models = 1;
421+
}
422+
423+
message RepositoryModelLoadRequest
424+
{
425+
// The name of the repository to load from. If empty the model
426+
// is loaded from any repository.
427+
string repository_name = 1;
428+
429+
// The name of the model to load, or reload.
430+
string model_name = 2;
431+
432+
// Optional model repository request parameters.
433+
map<string, ModelRepositoryParameter> parameters = 3;
434+
}
435+
436+
message RepositoryModelLoadResponse
437+
{
438+
}
439+
440+
message RepositoryModelUnloadRequest
441+
{
442+
// The name of the repository from which the model was originally
443+
// loaded. If empty the repository is not considered.
444+
string repository_name = 1;
445+
446+
// The name of the model to unload.
447+
string model_name = 2;
448+
449+
// Optional model repository request parameters.
450+
map<string, ModelRepositoryParameter> parameters = 3;
451+
}
452+
453+
message RepositoryModelUnloadResponse
454+
{
455+
}

internal/proto/mlserver/dataplane/dataplane_grpc.pb.go

Lines changed: 115 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)