Add the option to enable GRPC health checking to model_server. This is useful for clients that want to use health checking with load balancing channels (if not we get errors on the client side). The current implementation is trivial, once we open our serving port we assume we we always be healthy but users may want to tweak this, specially if they need a mandated version, etc.

tensorflower-gardener · tensorflow-copybara · commit a9a8e7bfe982 · 2024-02-20T03:40:11.000-08:00
PiperOrigin-RevId: 608545737
diff --git a/tensorflow_serving/model_servers/BUILD b/tensorflow_serving/model_servers/BUILD
@@ -498,6 +498,7 @@ cc_library(
         "//tensorflow_serving/servables/tensorflow:thread_pool_factory_config_cc_proto",
         "//tensorflow_serving/servables/tensorflow:util",
         "//tensorflow_serving/util:proto_util",
+        "@com_github_grpc_grpc//:grpc",
         "@com_github_grpc_grpc//:grpc++",
         "@com_google_absl//absl/memory",
         "@com_google_protobuf//:cc_wkt_protos",
diff --git a/tensorflow_serving/model_servers/main.cc b/tensorflow_serving/model_servers/main.cc
@@ -297,10 +297,12 @@ int main(int argc, char** argv) {
                        "If non-empty, read an ascii ThreadPoolConfig protobuf "
                        "from the supplied file name."),
       tensorflow::Flag("mixed_precision", &options.mixed_precision,
-                      "specify mixed_precision mode"),
+                       "specify mixed_precision mode"),
       tensorflow::Flag("skip_initialize_tpu", &options.skip_initialize_tpu,
-                       "Whether to skip auto initializing TPU.")};
-
+                       "Whether to skip auto initializing TPU."),
+      tensorflow::Flag("enable_grpc_healthcheck_service",
+                       &options.enable_grpc_healthcheck_service,
+                       "Enable the standard gRPC healthcheck service.")};
 
   const auto& usage = tensorflow::Flags::Usage(argv[0], flag_list);
   if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
diff --git a/tensorflow_serving/model_servers/server.cc b/tensorflow_serving/model_servers/server.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "google/protobuf/wrappers.pb.h"
 #include "grpc/grpc.h"
+#include "grpcpp/health_check_service_interface.h"
 #include "grpcpp/resource_quota.h"
 #include "grpcpp/security/server_credentials.h"
 #include "grpcpp/server_builder.h"
@@ -404,8 +405,17 @@ Status Server::BuildAndStart(const Options& server_options) {
   ::grpc::ResourceQuota res_quota;
   res_quota.SetMaxThreads(server_options.grpc_max_threads);
   builder.SetResourceQuota(res_quota);
-
+  ::grpc::EnableDefaultHealthCheckService(
+      server_options.enable_grpc_healthcheck_service);
   grpc_server_ = builder.BuildAndStart();
+
+  if (server_options.enable_grpc_healthcheck_service) {
+    grpc_server_->GetHealthCheckService()->SetServingStatus("ModelService",
+                                                            true);
+    grpc_server_->GetHealthCheckService()->SetServingStatus("PredictionService",
+                                                            true);
+  }
+
   if (grpc_server_ == nullptr) {
     return errors::InvalidArgument("Failed to BuildAndStart gRPC server");
   }
diff --git a/tensorflow_serving/model_servers/server.h b/tensorflow_serving/model_servers/server.h
@@ -103,7 +103,8 @@ class Server {
     bool enable_profiler = true;
     tensorflow::string mixed_precision;
     bool skip_initialize_tpu = false;
-
+    // Misc GRPC features
+    bool enable_grpc_healthcheck_service = false;
     Options();
   };