feat: Add V2 Inference API support for OVMS runtime (#31)

njhill · web-flow · commit 93ee2b28ec74 · 2022-08-26T10:49:38.000-07:00
#### Motivation

Intel's OpenVINO Model Server has historically only supported the v1 (TFS) prediction API, but may also support v2 in upcoming versions.

#### Modifications

Adjust the built-in OVMS adapter to advertise the V2 gRPC Service APIs in addition to V1.

#### Result

KServe V2 Inference API will work with OVMS in ModelMesh Serving once supported by the OVMS container.

Signed-off-by: Nick Hill &lt;nickhill@us.ibm.com&gt;
diff --git a/model-mesh-ovms-adapter/server/const.go b/model-mesh-ovms-adapter/server/const.go
@@ -14,6 +14,8 @@
 package server
 
 const (
-	ovmsModelSubdir   string = "_ovms_models"
-	onnxModelFilename string = "model.onnx"
+	tfsGrpcServiceName      string = "tensorflow.serving.PredictionService"
+	kServeV2GrpcServiceName string = "inference.GRPCInferenceService"
+	ovmsModelSubdir         string = "_ovms_models"
+	onnxModelFilename       string = "model.onnx"
 )
diff --git a/model-mesh-ovms-adapter/server/server.go b/model-mesh-ovms-adapter/server/server.go
@@ -221,10 +221,14 @@ func (s *OvmsAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runtim
 	runtimeStatus.RuntimeVersion = s.AdapterConfig.RuntimeVersion
 	runtimeStatus.LimitModelConcurrency = s.AdapterConfig.LimitModelConcurrency > 0
 
-	// OVMS only supports the Predict API currently
+	path_1 := []uint32{1}
 	path_1_1 := []uint32{1, 1} // PredictRequest[model_spec][name]
 	mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo)
-	mis["tensorflow.serving.PredictionService/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1}
+	// V1 (TFS) API
+	mis[tfsGrpcServiceName+"/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1}
+	// KServe V2 API
+	mis[kServeV2GrpcServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1}
+	mis[kServeV2GrpcServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1}
 	runtimeStatus.MethodInfos = mis
 
 	log.Info("runtimeStatus", "Status", runtimeStatus)
diff --git a/model-mesh-triton-adapter/server/server.go b/model-mesh-triton-adapter/server/server.go
@@ -261,8 +261,6 @@ func (s *TritonAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runt
 	path1 := []uint32{1}
 
 	mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo)
-
-	// only support Transform for now
 	mis[tritonServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1}
 	mis[tritonServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1}
 	runtimeStatus.MethodInfos = mis