apache
diff --git a/‎solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java‎
Lines changed: 10 additions & 217 deletions b/‎solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java‎
Lines changed: 10 additions & 217 deletions
@@ -17,42 +17,22 @@
 
 package org.apache.solr.handler.admin;
 
-import static org.apache.solr.client.api.model.NodeHealthResponse.NodeStatus.FAILURE;
-import static org.apache.solr.client.api.model.NodeHealthResponse.NodeStatus.OK;
-import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
-import static org.apache.solr.common.SolrException.ErrorCode.SERVICE_UNAVAILABLE;
-import static org.apache.solr.handler.admin.api.ReplicationAPIBase.GENERATION;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
-import java.util.Locale;
-import java.util.stream.Collectors;
-import org.apache.lucene.index.IndexCommit;
 import org.apache.solr.api.Api;
 import org.apache.solr.api.JerseyResource;
-import org.apache.solr.client.api.model.NodeHealthResponse;
 import org.apache.solr.client.solrj.request.HealthCheckRequest;
 import org.apache.solr.cloud.CloudDescriptor;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
-import org.apache.solr.common.cloud.Replica.State;
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.CoreContainer;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.IndexFetcher;
-import org.apache.solr.handler.ReplicationHandler;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.handler.admin.api.NodeHealthAPI;
 import org.apache.solr.handler.api.V2ApiUtils;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.security.AuthorizationContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Health Check Handler for reporting the health of a specific node.
@@ -80,12 +60,13 @@
  * specify the acceptable generation lag follower should be with respect to its leader using the
  * <code>maxGenerationLag=&lt;max_generation_lag&gt;</code> request parameter. If <code>
  * maxGenerationLag</code> is not provided then health check would simply return OK.
+ *
+ * <p>All health-check logic lives in the v2 {@link NodeHealthAPI}; this handler is a thin v1 bridge
+ * that extracts request parameters and delegates.
  */
 public class HealthCheckHandler extends RequestHandlerBase {
 
-  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   private static final String PARAM_REQUIRE_HEALTHY_CORES = "requireHealthyCores";
-  private static final List<State> UNHEALTHY_STATES = Arrays.asList(State.DOWN, State.RECOVERING);
 
   CoreContainer coreContainer;
 
@@ -107,207 +88,19 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
     final Integer maxGenerationLag =
         req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
     V2ApiUtils.squashIntoSolrResponseWithoutHeader(
-        rsp, checkNodeHealth(requireHealthyCores, maxGenerationLag));
-  }
-
-  /**
-   * Performs the node health check and returns the result as a {@link NodeHealthResponse}.
-   *
-   * <p>This method is the shared implementation used by both the v1 {@link #handleRequestBody} path
-   * and the v2 JAX-RS {@link NodeHealthAPI}.
-   */
-  public NodeHealthResponse checkNodeHealth(Boolean requireHealthyCores, Integer maxGenerationLag) {
-    if (coreContainer == null || coreContainer.isShutDown()) {
-      throw new SolrException(
-          SERVER_ERROR, "CoreContainer is either not initialized or shutting down");
-    }
-
-    final NodeHealthResponse response = new NodeHealthResponse();
-
-    if (!coreContainer.isZooKeeperAware()) {
-      if (log.isDebugEnabled()) {
-        log.debug("Invoked HealthCheckHandler in legacy mode.");
-      }
-      healthCheckLegacyMode(response, maxGenerationLag);
-    } else {
-      if (log.isDebugEnabled()) {
-        log.debug(
-            "Invoked HealthCheckHandler in cloud mode on [{}]",
-            coreContainer.getZkController().getNodeName());
-      }
-      healthCheckCloudMode(response, requireHealthyCores);
-    }
-
-    return response;
-  }
-
-  private void healthCheckCloudMode(NodeHealthResponse response, Boolean requireHealthyCores) {
-    ZkStateReader zkStateReader = coreContainer.getZkController().getZkStateReader();
-    ClusterState clusterState = zkStateReader.getClusterState();
-
-    if (zkStateReader.getZkClient().isClosed() || !zkStateReader.getZkClient().isConnected()) {
-      throw new SolrException(SERVICE_UNAVAILABLE, "Host Unavailable: Not connected to zk");
-    }
-
-    if (!clusterState.getLiveNodes().contains(coreContainer.getZkController().getNodeName())) {
-      throw new SolrException(SERVICE_UNAVAILABLE, "Host Unavailable: Not in live nodes as per zk");
-    }
-
-    if (Boolean.TRUE.equals(requireHealthyCores)) {
-      if (!coreContainer.isStatusLoadComplete()) {
-        throw new SolrException(SERVICE_UNAVAILABLE, "Host Unavailable: Core Loading not complete");
-      }
-      Collection<CloudDescriptor> coreDescriptors =
-          coreContainer.getCoreDescriptors().stream()
-              .map(cd -> cd.getCloudDescriptor())
-              .collect(Collectors.toList());
-      int unhealthyCores = findUnhealthyCores(coreDescriptors, clusterState);
-      if (unhealthyCores > 0) {
-        response.numCoresUnhealthy = unhealthyCores;
-        throw new SolrException(
-            SERVICE_UNAVAILABLE,
-            unhealthyCores
-                + " out of "
-                + coreContainer.getNumAllCores()
-                + " replicas are currently initializing or recovering");
-      }
-      response.message = "All cores are healthy";
-    }
-
-    response.status = OK;
-  }
-
-  private void healthCheckLegacyMode(NodeHealthResponse response, Integer maxGenerationLag) {
-    List<String> laggingCoresInfo = new ArrayList<>();
-    boolean allCoresAreInSync = true;
-
-    if (maxGenerationLag != null) {
-      if (maxGenerationLag < 0) {
-        log.error("Invalid value for maxGenerationLag:[{}]", maxGenerationLag);
-        response.message =
-            String.format(Locale.ROOT, "Invalid value of maxGenerationLag:%s", maxGenerationLag);
-        response.status = FAILURE;
-        return;
-      }
-
-      for (SolrCore core : coreContainer.getCores()) {
-        ReplicationHandler replicationHandler =
-            (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
-        if (replicationHandler.isFollower()) {
-          boolean isCoreInSync =
-              isWithinGenerationLag(core, replicationHandler, maxGenerationLag, laggingCoresInfo);
-          allCoresAreInSync &= isCoreInSync;
-        }
-      }
-
-      if (allCoresAreInSync) {
-        response.message =
-            String.format(
-                Locale.ROOT,
-                "All the followers are in sync with leader (within maxGenerationLag: %d) "
-                    + "or the cores are acting as leader",
-                maxGenerationLag);
-        response.status = OK;
-      } else {
-        response.message =
-            String.format(
-                Locale.ROOT,
-                "Cores violating maxGenerationLag:%d.%n%s",
-                maxGenerationLag,
-                String.join(",\n", laggingCoresInfo));
-        response.status = FAILURE;
-      }
-    } else {
-      response.message =
-          "maxGenerationLag isn't specified. Followers aren't "
-              + "checking for the generation lag from the leaders";
-      response.status = OK;
-    }
-  }
-
-  private boolean isWithinGenerationLag(
-      final SolrCore core,
-      ReplicationHandler replicationHandler,
-      int maxGenerationLag,
-      List<String> laggingCoresInfo) {
-    IndexFetcher indexFetcher = null;
-    try {
-      // may not be the best way to get leader's replicableCommit
-      NamedList<?> follower = (NamedList<?>) replicationHandler.getInitArgs().get("follower");
-      indexFetcher = new IndexFetcher(follower, replicationHandler, core);
-      NamedList<?> replicableCommitOnLeader = indexFetcher.getLatestVersion();
-      long leaderGeneration = (Long) replicableCommitOnLeader.get(GENERATION);
-
-      // Get our own commit and generation from the commit
-      IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
-      if (commit != null) {
-        long followerGeneration = commit.getGeneration();
-        long generationDiff = leaderGeneration - followerGeneration;
-
-        // generationDiff shouldn't be negative except for some edge cases, log it. Some scenarios
-        // are
-        // 1) commit generation rolls over Long.MAX_VALUE (really unlikely)
-        // 2) Leader's index is wiped clean and the follower is still showing commit generation
-        // from the old index
-        if (generationDiff < 0) {
-          log.warn("core:[{}], generation lag:[{}] is negative.");
-        } else if (generationDiff < maxGenerationLag) {
-          log.info(
-              "core:[{}] generation lag is above acceptable threshold:[{}], "
-                  + "generation lag:[{}], leader generation:[{}], follower generation:[{}]",
-              core,
-              maxGenerationLag,
-              generationDiff,
-              leaderGeneration,
-              followerGeneration);
-          laggingCoresInfo.add(
-              String.format(
-                  Locale.ROOT,
-                  "Core %s is lagging by %d generations",
-                  core.getName(),
-                  generationDiff));
-          return true;
-        }
-      }
-    } catch (Exception e) {
-      log.error("Failed to check if the follower is in sync with the leader", e);
-    } finally {
-      if (indexFetcher != null) {
-        indexFetcher.destroy();
-      }
-    }
-    return false;
+        rsp,
+        new NodeHealthAPI(coreContainer).checkNodeHealth(requireHealthyCores, maxGenerationLag));
   }
 
   /**
    * Find replicas DOWN or RECOVERING, or replicas in clusterstate that do not exist on local node.
-   * We first find local cores which are either not registered or unhealthy, and check each of these
-   * against the clusterstate, and return a count of unhealthy replicas
    *
-   * @param cores list of core cloud descriptors to iterate
-   * @param clusterState clusterstate from ZK
-   * @return number of unhealthy cores, either in DOWN or RECOVERING state
+   * @deprecated Use {@link NodeHealthAPI#findUnhealthyCores(Collection, ClusterState)} instead.
    */
+  @Deprecated
   public static int findUnhealthyCores(
       Collection<CloudDescriptor> cores, ClusterState clusterState) {
-    return Math.toIntExact(
-        cores.stream()
-            .filter(
-                c ->
-                    !c.hasRegistered()
-                        || UNHEALTHY_STATES.contains(
-                            c.getLastPublished())) // Find candidates locally
-            .filter(
-                c ->
-                    clusterState.hasCollection(
-                        c.getCollectionName())) // Only care about cores for actual collections
-            .filter(
-                c ->
-                    clusterState
-                        .getCollection(c.getCollectionName())
-                        .getActiveSlicesMap()
-                        .containsKey(c.getShardId()))
-            .count());
+    return NodeHealthAPI.findUnhealthyCores(cores, clusterState);
   }
 
   @Override
@@ -327,7 +120,7 @@ public Boolean registerV2() {
 
   @Override
   public Collection<Api> getApis() {
-    return List.of();
+    return Collections.emptyList();
   }
 
   @Override