opendatahub-io
diff --git a/‎Dockerfile
Lines changed: 3 additions & 1 deletion b/‎Dockerfile
Lines changed: 3 additions & 1 deletion
diff --git a/‎internal/proto/torchserve/inference.pb.go
Lines changed: 330 additions & 0 deletions b/‎internal/proto/torchserve/inference.pb.go
Lines changed: 330 additions & 0 deletions
diff --git a/‎internal/proto/torchserve/inference.proto
Lines changed: 37 additions & 0 deletions b/‎internal/proto/torchserve/inference.proto
Lines changed: 37 additions & 0 deletions
@@ -86,7 +86,7 @@ RUN go build -o puller model-serving-puller/main.go
 RUN go build -o triton-adapter model-mesh-triton-adapter/main.go
 RUN go build -o mlserver-adapter model-mesh-mlserver-adapter/main.go
 RUN go build -o ovms-adapter model-mesh-ovms-adapter/main.go
-
+RUN go build -o torchserve-adapter model-mesh-torchserve-adapter/main.go
 
 ###############################################################################
 # Stage 3: Copy build assets to create the smallest final runtime image
@@ -121,6 +121,8 @@ COPY --from=build /opt/app/triton-adapter /opt/app/
 COPY --from=build /opt/app/mlserver-adapter /opt/app/
 COPY --from=build /opt/app/model-mesh-triton-adapter/scripts/tf_pb.py /opt/scripts/
 COPY --from=build /opt/app/ovms-adapter /opt/app/
+COPY --from=build /opt/app/torchserve-adapter /opt/app/
+
 
 # Don't define an entrypoint. This is a multi-purpose image so the user should specify which binary they want to run (e.g. /opt/app/puller or /opt/app/triton-adapter)
 # ENTRYPOINT ["/opt/app/puller"]
@@ -0,0 +1,37 @@
+// Copied from https://github.com/pytorch/serve/blob/8c23585d2453f230c411721028ad4b07e58cc7dd/frontend/server/src/main/resources/proto/inference.proto
+
+syntax = "proto3";
+
+package org.pytorch.serve.grpc.inference;
+
+import "google/protobuf/empty.proto";
+
+option java_multiple_files = true;
+
+message PredictionsRequest {
+    // Name of model.
+    string model_name = 1; //required
+
+    // Version of model to run prediction on.
+    string model_version = 2; //optional
+
+    // input data for model prediction
+    map<string, bytes> input = 3; //required
+}
+
+message PredictionResponse {
+    // TorchServe health
+    bytes prediction = 1;
+}
+
+message TorchServeHealthResponse {
+    // TorchServe health
+    string health = 1;
+}
+
+service InferenceAPIsService {
+    rpc Ping(google.protobuf.Empty) returns (TorchServeHealthResponse) {}
+
+    // Predictions entry point to get inference using default model version.
+    rpc Predictions(PredictionsRequest) returns (PredictionResponse) {}
+}