Skip to content

Commit 88f759e

Browse files
Add check-proxy-health flag and implement Envoy health check on local… (#799)
* Add check-proxy-health flag and implement Envoy health check on local envoy running process. Disable executing graceful startup flow if graceful startup seconds grtr than 0. * Refactor Envoy health check logic into `doHealthCheck` function and add unit tests for improved coverage and modularity. * Fixed lint issue and added changelog * Refactor Envoy health check to improve readability and maintainability by using `http.Status` constants, adjusting test cases, and streamlining function calls.
1 parent 815740c commit 88f759e

File tree

6 files changed

+181
-3
lines changed

6 files changed

+181
-3
lines changed

.changelog/799.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:feature
2+
Implemented a subcommand "check-proxy-health" which checks whether locally running envoy proxy is ready or not by calling http endpoint /ready on evoy admin URL. This is implemented for kubelet startup and liveness probes when consul-dataplane is registered as sidecar container.
3+
```

cmd/consul-dataplane/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ import (
1414
)
1515

1616
type FlagOpts struct {
17-
dataplaneConfig DataplaneConfigFlags
17+
dataplaneConfig DataplaneConfigFlags
18+
checkProxyHealth bool
1819

1920
printVersion bool
2021
configFile string

cmd/consul-dataplane/main.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ import (
88
"flag"
99
"fmt"
1010
"log"
11+
"net/http"
1112
"os"
1213
"os/signal"
1314
"strings"
1415
"syscall"
16+
"time"
1517

1618
"github.com/hashicorp/consul-dataplane/pkg/consuldp"
1719
"github.com/hashicorp/consul-dataplane/pkg/version"
@@ -27,6 +29,8 @@ func init() {
2729
flagOpts = &FlagOpts{}
2830
flags.BoolVar(&flagOpts.printVersion, "version", false, "Prints the current version of consul-dataplane.")
2931

32+
flags.BoolVar(&flagOpts.checkProxyHealth, "check-proxy-health", false, "checks envoy proxy health and exits with 0 if healthy, 1 otherwise.")
33+
3034
StringVar(flags, &flagOpts.dataplaneConfig.Mode, "mode", "DP_MODE", "dataplane mode. Value can be:\n"+
3135
"1. sidecar - used when running as a sidecar to Consul services with xDS Server, Envoy, and DNS Server running; OR\n"+
3236
"2. dns-proxy - used when running as a standalone application where DNS Server runs, but Envoy and xDS Server are enabled.\n")
@@ -157,6 +161,17 @@ func run() error {
157161
return nil
158162
}
159163

164+
consulDPDefaultFlags, err := buildDefaultConsulDPFlags()
165+
if err != nil {
166+
return err
167+
}
168+
169+
if flagOpts.checkProxyHealth {
170+
fmt.Printf("Checking envoy proxy health\n")
171+
runProxyReadyCmd(consulDPDefaultFlags)
172+
return nil
173+
}
174+
160175
readServiceIDFromFile()
161176
readProxyIDFromFile()
162177
validateFlags()
@@ -231,3 +246,52 @@ func readProxyIDFromFile() {
231246
flagOpts.dataplaneConfig.Proxy.ID = &s
232247
}
233248
}
249+
250+
func runProxyReadyCmd(config DataplaneConfigFlags) {
251+
// Define the Envoy admin endpoint URL. This is typically internal and
252+
// only accessible from within the Pod on the loopback interface.
253+
254+
adminPort := *config.Envoy.AdminBindPort
255+
256+
if flagOpts.dataplaneConfig.Envoy.AdminBindPort != nil {
257+
adminPort = *flagOpts.dataplaneConfig.Envoy.AdminBindPort
258+
}
259+
260+
doHealthCheck(adminPort, http.DefaultClient, os.Exit)
261+
}
262+
263+
func doHealthCheck(adminPort int, client *http.Client, exitFunc func(int)) {
264+
envoyAdminURL := fmt.Sprintf("http://127.0.0.1:%d/ready", adminPort)
265+
266+
// Create a context with a timeout for the HTTP request. This prevents
267+
// the check from hanging indefinitely. A short timeout is best for probes.
268+
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
269+
defer cancel()
270+
271+
// Create a new HTTP request with the specified context.
272+
req, err := http.NewRequestWithContext(ctx, "GET", envoyAdminURL, nil)
273+
if err != nil {
274+
fmt.Fprintf(os.Stderr, "Error creating request: %v\n", err)
275+
exitFunc(1)
276+
return
277+
}
278+
279+
// Perform the HTTP request.
280+
resp, err := client.Do(req)
281+
if err != nil {
282+
fmt.Fprintf(os.Stderr, "Error connecting to Envoy admin endpoint: %v\n", err)
283+
exitFunc(1)
284+
return
285+
}
286+
defer resp.Body.Close()
287+
288+
// For a Kubernetes probe, the only thing that matters is the status code.
289+
// A status code between 200 and 399 indicates success.
290+
if resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusBadRequest {
291+
fmt.Println("Envoy proxy is ready.")
292+
exitFunc(0)
293+
} else {
294+
fmt.Fprintf(os.Stderr, "Envoy proxy is not ready. Received status code: %d\n", resp.StatusCode)
295+
exitFunc(1)
296+
}
297+
}

cmd/consul-dataplane/main_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"github.com/stretchr/testify/require"
6+
"io"
7+
"net/http"
8+
"net/http/httptest"
9+
"net/url"
10+
"os"
11+
"strconv"
12+
"testing"
13+
)
14+
15+
func TestDoHealthCheck(t *testing.T) {
16+
tests := []struct {
17+
name string
18+
statusCode int
19+
serverErr bool
20+
expectedExit int
21+
expectedOutput string
22+
}{
23+
{
24+
name: "success with 200",
25+
statusCode: http.StatusOK,
26+
expectedExit: 0,
27+
expectedOutput: "Envoy proxy is ready.\n",
28+
},
29+
{
30+
name: "success with 204",
31+
statusCode: http.StatusNoContent,
32+
expectedExit: 0,
33+
expectedOutput: "Envoy proxy is ready.\n",
34+
},
35+
{
36+
name: "failure with 404",
37+
statusCode: http.StatusNotFound,
38+
expectedExit: 1,
39+
expectedOutput: "Envoy proxy is not ready. Received status code: 404\n",
40+
},
41+
{
42+
name: "failure with 500",
43+
statusCode: http.StatusInternalServerError,
44+
expectedExit: 1,
45+
expectedOutput: "Envoy proxy is not ready. Received status code: 500\n",
46+
},
47+
{
48+
name: "server error",
49+
serverErr: true,
50+
expectedExit: 1,
51+
expectedOutput: "Error connecting to Envoy admin endpoint: ",
52+
},
53+
}
54+
55+
for _, tc := range tests {
56+
t.Run(tc.name, func(t *testing.T) {
57+
var exitCode int
58+
mockExit := func(code int) {
59+
exitCode = code
60+
}
61+
62+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
63+
require.Equal(t, "/ready", r.URL.Path)
64+
require.Equal(t, "GET", r.Method)
65+
66+
if tc.serverErr {
67+
panic("simulated server error")
68+
}
69+
70+
w.WriteHeader(tc.statusCode)
71+
}))
72+
defer ts.Close()
73+
74+
client := ts.Client()
75+
u, _ := url.Parse(ts.URL)
76+
port, _ := strconv.Atoi(u.Port())
77+
78+
// Capture stdout/stderr
79+
stdout := captureOutput(t, func() {
80+
doHealthCheck(port, client, mockExit)
81+
})
82+
83+
require.Contains(t, stdout, tc.expectedOutput)
84+
require.Equal(t, tc.expectedExit, exitCode)
85+
86+
})
87+
}
88+
}
89+
90+
func captureOutput(t *testing.T, f func()) string {
91+
oldStdout := os.Stdout
92+
oldStderr := os.Stderr
93+
r, w, _ := os.Pipe()
94+
os.Stdout = w
95+
os.Stderr = w
96+
97+
f()
98+
99+
w.Close()
100+
os.Stdout = oldStdout
101+
os.Stderr = oldStderr
102+
103+
var buf bytes.Buffer
104+
if _, err := io.Copy(&buf, r); err != nil {
105+
t.Fatalf("failed to copy output: %v", err)
106+
}
107+
return buf.String()
108+
}

pkg/consuldp/consul_dataplane.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,6 @@ func (cdp *ConsulDataplane) Run(ctx context.Context) error {
249249
return err
250250
}
251251

252-
cdp.lifecycleConfig.gracefulStartup()
253-
254252
go func() {
255253
select {
256254
case <-ctx.Done():

pkg/envoy/proxy.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,10 @@ func (p *Proxy) Ready() (bool, error) {
396396
p.cfg.Logger.Error("envoy: admin endpoint not available", "error", err)
397397
return false, err
398398
}
399+
if rsp != nil {
400+
defer rsp.Body.Close()
401+
}
402+
399403
return rsp.StatusCode == 200, nil
400404
default:
401405
return false, nil

0 commit comments

Comments
 (0)