DOC-5665 failover concepts

andy-stark-redis · andy-stark-redis · commit effa51a38070 · 2025-08-29T14:01:01.000+01:00
diff --git a/content/develop/clients/jedis/failover.md b/content/develop/clients/jedis/failover.md
@@ -0,0 +1,85 @@
+---
+categories:
+- docs
+- develop
+- stack
+- oss
+- rs
+- rc
+- oss
+- kubernetes
+- clients
+description: Improve reliability using the failover/failback features of Jedis.
+linkTitle: Failover/failback
+title: Failover and failback
+weight: 50
+---
+
+Jedis supports [failover and failback](https://en.wikipedia.org/wiki/Failover)
+to improve the availability of connections to Redis databases. This page explains
+the concepts and describes how to configure Jedis for failover and failback.
+
+## Concepts
+
+You may have [Active-Active databases]({{< relref "/operate/rs/databases/active-active" >}})
+or independent Redis servers that are all suitable to serve your app.
+Typically, you would prefer some database endpoints over others for a particular
+instance of your app (perhaps the ones that are closest geographically to the app server
+to reduce network latency). However, if the best endpoint is not available due
+to a failure, it is generally better to switch to another, suboptimal endpoint
+than to let the app fail completely.
+
+*Failover* is the technique of actively checking for connection failures and
+automatically switching to another endpoint when a failure is detected.
+
+{{< image filename="images/failover/failover-client-reconnect.svg" alt="Failover and client reconnection" >}}
+
+The complementary technique of *failback* then involves checking the original
+endpoint periodically to see if it has recovered, and switching back to it
+when it is available again.
+
+{{< image filename="images/failover/failover-client-failback.svg" alt="Failback: client switches back to original server" width="75%" >}}
+
+### Detecting a failed connection
+
+Jedis uses the [resilience4j](https://resilience4j.readme.io/docs/getting-started)
+to detect connection failures using a
+[circuit breaker design pattern](https://en.wikipedia.org/wiki/Circuit_breaker_design_pattern).
+
+The circuit breaker is a software component that tracks recent connection
+attempts in sequence, recording which ones have succeeded and which have failed.
+(Note that many connection failures are transient, so before recording a failure,
+the first response should usually be just to retry the connection a few times.)
+
+The status of the connection attempts is kept in a "sliding window", which
+is simply a buffer where the least recent item is dropped as each new
+one is added.
+
+{{< image filename="images/failover/failover-sliding-window.svg" alt="Sliding window of recent connection attempts" >}}
+
+When the number of failures in the window exceeds a configured
+threshold, the circuit breaker declares the server to be unhealthy and triggers
+a failover.
+
+### Selecting a failover target
+
+Since you may have multiple Redis servers available to fail over to, Jedis
+lets you configure a list of endpoints to try, ordered by priority or
+"weight". When a failover is triggered, Jedis selects the highest-weighted
+endpoint that is still healthy and uses it for the temporary connection.
+
+### Health checks
+
+Given that the original endpoint had some geographical or other advantage
+over the failover target, you will generally want to fail back to it as soon
+as it recovers. To detect when this happens, Jedis periodically
+runs a "health check" on the server. This can be as simple as
+sending a Redis [`ECHO`]({{< relref "/commands/echo" >}})) command and checking
+that it gives a response.
+
+You can also configure Jedis to run health checks on the current target
+server during periods of inactivity. This can help to detect when the
+server has failed and a failover is needed even when your app is not actively
+using it.
+
+
diff --git a/static/images/failover/failover-client-failback.svg b/static/images/failover/failover-client-failback.svg
@@ -0,0 +1,89 @@
+<svg version="1.1" xmlns="http://www.w3.org/2000/svg" width="640" height="280" viewBox="0 0 640 300">
+  <style>
+    .title { font: 700 16px 'Space Grotesk', Helvetica, Arial, sans-serif; fill: #000; }
+    .label { font: 12px 'Space Grotesk', Helvetica, Arial, sans-serif; fill: #000; }
+    .panel { fill: #ffffff; stroke: #000000; rx: 10; }
+    .map { fill: rgb(240,240,240); stroke: #cccccc; }
+    .server-body { fill: #ffffff; stroke: #000000; rx: 4; }
+    .server-top { fill: rgb(200,0,0); }
+    .client { fill: #ffffff; stroke: #000000; rx: 6; }
+    .connOk { stroke: #FF0000; stroke-width: 3; fill: none; }
+    .connPast { stroke: #999999; stroke-width: 2; fill: none; stroke-dasharray: 6 6; opacity: 0.6; }
+    .checkmark { stroke: #2e7d32; stroke-width: 3; stroke-linecap: round; fill: none; }
+  </style>
+
+  <defs>
+    <!-- Arrow marker matching CSC diagrams' red, width 3 -->
+    <marker id="arrowRed" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="8" markerHeight="8" orient="auto-start-reverse">
+      <path d="M 0 0 L 10 5 L 0 10 z" fill="#FF0000" />
+    </marker>
+
+    <!-- Simple server icon -->
+    <g id="serverIcon">
+      <rect class="server-body" x="0" y="4" width="36" height="24" />
+      <rect class="server-top" x="0" y="0" width="36" height="6" />
+      <circle cx="30" cy="17" r="2" fill="#999" />
+    </g>
+
+    <!-- Simple client box -->
+    <g id="clientIcon">
+      <rect class="client" x="0" y="0" width="70" height="36" />
+      <text x="35" y="23" text-anchor="middle" class="label">Client</text>
+    </g>
+
+    <!-- Abstract map shape (same as reconnect diagram) -->
+    <g id="mapShape">
+      <path class="map" d="M20,70 C60,20 140,20 180,70 C220,120 140,160 100,150 C60,140 -20,120 20,70 z"/>
+      <!-- small island cluster -->
+      <path class="map" d="M170,110 c15,-10 35,-8 44,2 c9,10 -3,25 -20,28 c-17,3 -41,-9 -24,-30 z"/>
+      <path class="map" d="M48,124 c10,-6 22,-6 28,0 c6,6 3,14 -6,16 c-9,2 -24,-3 -22,-16 z"/>
+    </g>
+  </defs>
+  <!-- Panel 3: Original available -->
+  <g transform="translate(10,10)">
+    <rect class="panel" x="0" y="0" width="300" height="280" />
+    <text class="title" x="12" y="26">4. Original available</text>
+    <g transform="translate(20,40)">
+      <use href="#mapShape"/>
+      <!-- Same servers -->
+      <use href="#serverIcon" x="50" y="90"/>
+      <use href="#serverIcon" x="150" y="40"/>
+      <use href="#serverIcon" x="180" y="130"/>
+      <!-- Client -->
+      <use href="#clientIcon" x="0" y="170"/>
+      <!-- Solid connection to temporary server (still in place) -->
+      <path class="connOk" marker-end="url(#arrowRed)" d="M 70,188 C 120,170 160,160 188,146"/>
+      <!-- Green checkmark indicating original server healthy -->
+      <g transform="translate(65,110)">
+        <path class="checkmark" d="M -6 0 L -1 5 L 8 -6"/>
+      </g>
+      <text class="label" x="10" y="230">Original server healthy (detected)</text>
+    </g>
+  </g>
+
+
+  <!-- Panel 4: Failback -->
+  <g transform="translate(330,10)">
+    <rect class="panel" x="0" y="0" width="300" height="280" />
+    <text class="title" x="12" y="26">5. Failback</text>
+    <g transform="translate(20,40)">
+      <use href="#mapShape"/>
+      <!-- Servers on the map (original server at 50,90 available again) -->
+      <use href="#serverIcon" x="50" y="90"/>
+      <use href="#serverIcon" x="150" y="40"/>
+      <use href="#serverIcon" x="180" y="130"/>
+      <!-- Client -->
+      <use href="#clientIcon" x="0" y="170"/>
+      <!-- Faint dashed past connection to temporary server at (180,130) -->
+      <path class="connPast" d="M 70,188 C 120,170 160,160 188,146"/>
+      <!-- Solid connection back to original server at (50,90) -->
+      <path class="connOk" marker-end="url(#arrowRed)" d="M 70,188 C 90,170 80,140 68,114"/>
+      <!-- Green checkmark indicating original server healthy -->
+      <g transform="translate(65,110)">
+        <path class="checkmark" d="M -6 0 L -1 5 L 8 -6"/>
+      </g>
+      <text class="label" x="10" y="230">Connection switches back to original server</text>
+    </g>
+  </g>
+</svg>
+
diff --git a/static/images/failover/failover-client-reconnect.svg b/static/images/failover/failover-client-reconnect.svg
@@ -0,0 +1,107 @@
+<svg version="1.1" xmlns="http://www.w3.org/2000/svg" width="960" height="300" viewBox="0 0 960 300">
+  <style>
+    .title { font: 700 16px 'Space Grotesk', Helvetica, Arial, sans-serif; fill: #000; }
+    .label { font: 12px 'Space Grotesk', Helvetica, Arial, sans-serif; fill: #000; }
+    .panel { fill: #ffffff; stroke: #000000; rx: 10; }
+    .map { fill: rgb(240,240,240); stroke: #cccccc; }
+    .server-body { fill: #ffffff; stroke: #000000; rx: 4; }
+    .server-top { fill: rgb(200,0,0); }
+    .client { fill: #ffffff; stroke: #000000; rx: 6; }
+    .connOk { stroke: #FF0000; stroke-width: 3; fill: none; }
+    .connBroken { stroke: #FF0000; stroke-width: 3; fill: none; stroke-dasharray: 8 6; }
+    .xmark { stroke: #FF0000; stroke-width: 3; stroke-linecap: round; }
+  </style>
+
+  <defs>
+    <!-- Arrow marker matching CSC diagrams' red, width 3 -->
+    <marker id="arrowRed" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="8" markerHeight="8" orient="auto-start-reverse">
+      <path d="M 0 0 L 10 5 L 0 10 z" fill="#FF0000" />
+    </marker>
+
+    <!-- Simple server icon -->
+    <g id="serverIcon">
+      <rect class="server-body" x="0" y="4" width="36" height="24" />
+      <rect class="server-top" x="0" y="0" width="36" height="6" />
+      <circle cx="30" cy="17" r="2" fill="#999" />
+    </g>
+
+    <!-- Simple client box -->
+    <g id="clientIcon">
+      <rect class="client" x="0" y="0" width="70" height="36" />
+      <text x="35" y="23" text-anchor="middle" class="label">Client</text>
+    </g>
+
+    <!-- Abstract map shape -->
+    <g id="mapShape">
+      <path class="map" d="M20,70 C60,20 140,20 180,70 C220,120 140,160 100,150 C60,140 -20,120 20,70 z"/>
+      <!-- small island cluster -->
+      <path class="map" d="M170,110 c15,-10 35,-8 44,2 c9,10 -3,25 -20,28 c-17,3 -41,-9 -24,-30 z"/>
+      <path class="map" d="M48,124 c10,-6 22,-6 28,0 c6,6 3,14 -6,16 c-9,2 -24,-3 -22,-16 z"/>
+    </g>
+  </defs>
+
+  <!-- Panel 1: Connected -->
+  <g transform="translate(10,10)">
+    <rect class="panel" x="0" y="0" width="300" height="280" />
+    <text class="title" x="12" y="26">1. Connected</text>
+    <g transform="translate(20,40)">
+      <use href="#mapShape"/>
+      <!-- Servers on the map -->
+      <use href="#serverIcon" x="50" y="90"/>
+      <use href="#serverIcon" x="150" y="40"/>
+      <use href="#serverIcon" x="180" y="130"/>
+      <!-- Client -->
+      <use href="#clientIcon" x="0" y="170"/>
+      <!-- Solid connection to server at (50,90) -->
+      <path class="connOk" marker-end="url(#arrowRed)" d="M 70,188 C 90,170 80,140 68,114"/>
+      <text class="label" x="10" y="230">Active connection</text>
+    </g>
+  </g>
+
+  <!-- Panel 2: Connection lost -->
+  <g transform="translate(330,10)">
+    <rect class="panel" x="0" y="0" width="300" height="280" />
+    <text class="title" x="12" y="26">2. Connection lost</text>
+    <g transform="translate(20,40)">
+      <use href="#mapShape"/>
+      <!-- Same servers -->
+      <use href="#serverIcon" x="50" y="90"/>
+      <use href="#serverIcon" x="150" y="40"/>
+      <use href="#serverIcon" x="180" y="130"/>
+      <!-- Client -->
+      <use href="#clientIcon" x="0" y="170"/>
+      <!-- Broken connection shown dashed + X -->
+      <path class="connBroken" marker-end="url(#arrowRed)" d="M 70,188 C 90,170 80,140 68,114"/>
+      <g transform="translate(65,110)">
+        <line class="xmark" x1="-6" y1="-6" x2="6" y2="6"/>
+        <line class="xmark" x1="6" y1="-6" x2="-6" y2="6"/>
+      </g>
+      <text class="label" x="10" y="230">Network issue or node down</text>
+    </g>
+  </g>
+
+  <!-- Panel 3: Automatic failover -->
+  <g transform="translate(650,10)">
+    <rect class="panel" x="0" y="0" width="300" height="280" />
+    <text class="title" x="12" y="26">3. Automatic failover</text>
+    <g transform="translate(20,40)">
+      <use href="#mapShape"/>
+      <!-- Same servers -->
+      <use href="#serverIcon" x="50" y="90"/>
+      <use href="#serverIcon" x="150" y="40"/>
+      <use href="#serverIcon" x="180" y="130"/>
+      <!-- Client -->
+      <use href="#clientIcon" x="0" y="170"/>
+      <!-- New solid connection to a different server (e.g., at 180,130) -->
+      <path class="connOk" marker-end="url(#arrowRed)" d="M 70,188 C 120,170 160,160 188,146"/>
+      <!-- Mark failed original server -->
+      <g transform="translate(65,110)">
+        <line class="xmark" x1="-6" y1="-6" x2="6" y2="6"/>
+        <line class="xmark" x1="6" y1="-6" x2="-6" y2="6"/>
+      </g>
+
+      <text class="label" x="10" y="230">Client reconnects to healthy server</text>
+    </g>
+  </g>
+</svg>
+
diff --git a/static/images/failover/failover-sliding-window.svg b/static/images/failover/failover-sliding-window.svg