Skip to content

Commit 94db245

Browse files
committed
Add path http health check ability on before proxy start
1 parent 26f6847 commit 94db245

File tree

6 files changed

+215
-35
lines changed

6 files changed

+215
-35
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ environment variables that you can set.
8585
| `BAD_GATEWAY_PAGE` | Path to an HTML file to serve when the backend server returns a 502 Bad Gateway error. If there is no file at the specific path, Thruster will serve an empty 502 response instead. Because Thruster boots very quickly, a custom page can be a useful way to show that your application is starting up. | `./public/502.html` |
8686
| `HTTP_PORT` | The port to listen on for HTTP traffic. | 80 |
8787
| `HTTPS_PORT` | The port to listen on for HTTPS traffic. | 443 |
88+
| `HTTP_HEALTH_PATH` | The http health path to check before start port listening. | None |
89+
| `HTTP_HEALTH_HOST` | The http health host to check before start port listening. | 127.0.0.1 |
90+
| `HTTP_HEALTH_INTERVAL` | The http health path check interval (seconds). | 1 |
91+
| `HTTP_HEALTH_TIMEOUT` | The http health path check timeout (seconds). | 1 |
92+
| `HTTP_HEALTH_DEADLINE` | The http health path deadline interval (seconds), after which thruster will exit with error, if no success response. | 120 |
8893
| `HTTP_IDLE_TIMEOUT` | The maximum time in seconds that a client can be idle before the connection is closed. | 60 |
8994
| `HTTP_READ_TIMEOUT` | The maximum time in seconds that a client can take to send the request headers and body. | 30 |
9095
| `HTTP_WRITE_TIMEOUT` | The maximum time in seconds during which the client must read the response. | 30 |

go.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ module github.com/basecamp/thruster
33
go 1.25.3
44

55
require (
6-
github.com/klauspost/compress v1.17.4
6+
github.com/klauspost/compress v1.18.1
77
github.com/stretchr/testify v1.8.4
8-
golang.org/x/crypto v0.37.0
8+
golang.org/x/crypto v0.43.0
9+
golang.org/x/net v0.46.0
910
)
1011

1112
require (
1213
github.com/davecgh/go-spew v1.1.1 // indirect
1314
github.com/kr/text v0.2.0 // indirect
1415
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
1516
github.com/pmezard/go-difflib v1.0.0 // indirect
16-
golang.org/x/net v0.39.0 // indirect
17-
golang.org/x/text v0.24.0 // indirect
17+
golang.org/x/text v0.30.0 // indirect
1818
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
1919
gopkg.in/yaml.v3 v3.0.1 // indirect
2020
)

go.sum

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
22
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
33
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4-
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
5-
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
4+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
5+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
66
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
77
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
88
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -13,12 +13,12 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
1313
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
1414
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
1515
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
16-
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
17-
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
18-
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
19-
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
20-
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
21-
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
16+
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
17+
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
18+
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
19+
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
20+
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
21+
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
2222
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
2323
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
2424
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

internal/config.go

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@ const (
2727
defaultStoragePath = "./storage/thruster"
2828
defaultBadGatewayPage = "./public/502.html"
2929

30-
defaultHttpPort = 80
31-
defaultHttpsPort = 443
32-
defaultHttpIdleTimeout = 60 * time.Second
33-
defaultHttpReadTimeout = 30 * time.Second
34-
defaultHttpWriteTimeout = 30 * time.Second
30+
defaultHttpPort = 80
31+
defaultHttpsPort = 443
32+
defaultHttpHealthTimeout = 1 * time.Second
33+
defaultHttpHealthInterval = 1 * time.Second
34+
defaultHttpHealthDeadline = 2 * time.Minute
35+
defaultHttpIdleTimeout = 60 * time.Second
36+
defaultHttpReadTimeout = 30 * time.Second
37+
defaultHttpWriteTimeout = 30 * time.Second
3538

3639
defaultH2CEnabled = false
3740

@@ -57,11 +60,16 @@ type Config struct {
5760
StoragePath string
5861
BadGatewayPage string
5962

60-
HttpPort int
61-
HttpsPort int
62-
HttpIdleTimeout time.Duration
63-
HttpReadTimeout time.Duration
64-
HttpWriteTimeout time.Duration
63+
HttpPort int
64+
HttpsPort int
65+
HttpHealthHost string
66+
HttpHealthPath string
67+
HttpHealthTimeout time.Duration
68+
HttpHealthInterval time.Duration
69+
HttpHealthDeadline time.Duration
70+
HttpIdleTimeout time.Duration
71+
HttpReadTimeout time.Duration
72+
HttpWriteTimeout time.Duration
6573

6674
H2CEnabled bool
6775

@@ -84,7 +92,7 @@ func NewConfig() (*Config, error) {
8492
config := &Config{
8593
TargetPort: getEnvInt("TARGET_PORT", defaultTargetPort),
8694
UpstreamCommand: os.Args[1],
87-
UpstreamArgs: os.Args[2:],
95+
UpstreamArgs: append([]string{}, os.Args[2:]...),
8896

8997
CacheSizeBytes: getEnvInt("CACHE_SIZE", defaultCacheSize),
9098
MaxCacheItemSizeBytes: getEnvInt("MAX_CACHE_ITEM_SIZE", defaultMaxCacheItemSizeBytes),
@@ -99,11 +107,16 @@ func NewConfig() (*Config, error) {
99107
StoragePath: getEnvString("STORAGE_PATH", defaultStoragePath),
100108
BadGatewayPage: getEnvString("BAD_GATEWAY_PAGE", defaultBadGatewayPage),
101109

102-
HttpPort: getEnvInt("HTTP_PORT", defaultHttpPort),
103-
HttpsPort: getEnvInt("HTTPS_PORT", defaultHttpsPort),
104-
HttpIdleTimeout: getEnvDuration("HTTP_IDLE_TIMEOUT", defaultHttpIdleTimeout),
105-
HttpReadTimeout: getEnvDuration("HTTP_READ_TIMEOUT", defaultHttpReadTimeout),
106-
HttpWriteTimeout: getEnvDuration("HTTP_WRITE_TIMEOUT", defaultHttpWriteTimeout),
110+
HttpPort: getEnvInt("HTTP_PORT", defaultHttpPort),
111+
HttpsPort: getEnvInt("HTTPS_PORT", defaultHttpsPort),
112+
HttpHealthHost: getEnvString("HTTP_HEALTH_HOST", "127.0.0.1"),
113+
HttpHealthPath: getEnvString("HTTP_HEALTH_PATH", ""),
114+
HttpHealthInterval: getEnvDuration("HTTP_HEALTH_INTERVAL", defaultHttpHealthInterval),
115+
HttpHealthTimeout: getEnvDuration("HTTP_HEALTH_TIMEOUT", defaultHttpHealthTimeout),
116+
HttpHealthDeadline: getEnvDuration("HTTP_HEALTH_DEADLINE", defaultHttpHealthDeadline),
117+
HttpIdleTimeout: getEnvDuration("HTTP_IDLE_TIMEOUT", defaultHttpIdleTimeout),
118+
HttpReadTimeout: getEnvDuration("HTTP_READ_TIMEOUT", defaultHttpReadTimeout),
119+
HttpWriteTimeout: getEnvDuration("HTTP_WRITE_TIMEOUT", defaultHttpWriteTimeout),
107120

108121
H2CEnabled: getEnvBool("H2C_ENABLED", defaultH2CEnabled),
109122

internal/config_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ func TestConfig_defaults(t *testing.T) {
105105
assert.Equal(t, "echo", c.UpstreamCommand)
106106
assert.Equal(t, defaultCacheSize, c.CacheSizeBytes)
107107
assert.Equal(t, slog.LevelInfo, c.LogLevel)
108+
assert.Equal(t, "", c.HttpHealthPath)
109+
assert.Equal(t, "127.0.0.1", c.HttpHealthHost)
110+
assert.Equal(t, 1*time.Second, c.HttpHealthTimeout)
111+
assert.Equal(t, 1*time.Second, c.HttpHealthInterval)
112+
assert.Equal(t, 2*time.Minute, c.HttpHealthDeadline)
108113
assert.Equal(t, false, c.H2CEnabled)
109114
}
110115

@@ -118,6 +123,11 @@ func TestConfig_override_defaults_with_env_vars(t *testing.T) {
118123
usingEnvVar(t, "DEBUG", "1")
119124
usingEnvVar(t, "ACME_DIRECTORY", "https://acme-staging-v02.api.letsencrypt.org/directory")
120125
usingEnvVar(t, "LOG_REQUESTS", "false")
126+
usingEnvVar(t, "HTTP_HEALTH_PATH", "/health")
127+
usingEnvVar(t, "HTTP_HEALTH_HOST", "localhost")
128+
usingEnvVar(t, "HTTP_HEALTH_INTERVAL", "3")
129+
usingEnvVar(t, "HTTP_HEALTH_TIMEOUT", "4")
130+
usingEnvVar(t, "HTTP_HEALTH_DEADLINE", "60")
121131
usingEnvVar(t, "H2C_ENABLED", "true")
122132

123133
c, err := NewConfig()
@@ -130,6 +140,11 @@ func TestConfig_override_defaults_with_env_vars(t *testing.T) {
130140
assert.Equal(t, false, c.GzipCompressionEnabled)
131141
assert.Equal(t, slog.LevelDebug, c.LogLevel)
132142
assert.Equal(t, "https://acme-staging-v02.api.letsencrypt.org/directory", c.ACMEDirectoryURL)
143+
assert.Equal(t, "/health", c.HttpHealthPath)
144+
assert.Equal(t, "localhost", c.HttpHealthHost)
145+
assert.Equal(t, 3*time.Second, c.HttpHealthInterval)
146+
assert.Equal(t, 4*time.Second, c.HttpHealthTimeout)
147+
assert.Equal(t, 60*time.Second, c.HttpHealthDeadline)
133148
assert.Equal(t, false, c.LogRequests)
134149
assert.Equal(t, true, c.H2CEnabled)
135150
}
@@ -142,6 +157,11 @@ func TestConfig_override_defaults_with_env_vars_using_prefix(t *testing.T) {
142157
usingEnvVar(t, "THRUSTER_X_SENDFILE_ENABLED", "0")
143158
usingEnvVar(t, "THRUSTER_DEBUG", "1")
144159
usingEnvVar(t, "THRUSTER_LOG_REQUESTS", "0")
160+
usingEnvVar(t, "THRUSTER_HTTP_HEALTH_PATH", "/health")
161+
usingEnvVar(t, "THRUSTER_HTTP_HEALTH_HOST", "localhost")
162+
usingEnvVar(t, "THRUSTER_HTTP_HEALTH_INTERVAL", "3")
163+
usingEnvVar(t, "THRUSTER_HTTP_HEALTH_TIMEOUT", "4")
164+
usingEnvVar(t, "THRUSTER_HTTP_HEALTH_DEADLINE", "60")
145165
usingEnvVar(t, "THRUSTER_H2C_ENABLED", "1")
146166

147167
c, err := NewConfig()
@@ -153,6 +173,11 @@ func TestConfig_override_defaults_with_env_vars_using_prefix(t *testing.T) {
153173
assert.Equal(t, false, c.XSendfileEnabled)
154174
assert.Equal(t, slog.LevelDebug, c.LogLevel)
155175
assert.Equal(t, false, c.LogRequests)
176+
assert.Equal(t, "/health", c.HttpHealthPath)
177+
assert.Equal(t, "localhost", c.HttpHealthHost)
178+
assert.Equal(t, 3*time.Second, c.HttpHealthInterval)
179+
assert.Equal(t, 4*time.Second, c.HttpHealthTimeout)
180+
assert.Equal(t, 60*time.Second, c.HttpHealthDeadline)
156181
assert.Equal(t, true, c.H2CEnabled)
157182
}
158183

@@ -167,6 +192,20 @@ func TestConfig_prefixed_variables_take_precedence_over_non_prefixed(t *testing.
167192
assert.Equal(t, 4000, c.TargetPort)
168193
}
169194

195+
func TestConfig_defaults_are_used_if_strconv_fails(t *testing.T) {
196+
usingProgramArgs(t, "thruster", "echo", "hello")
197+
usingEnvVar(t, "TARGET_PORT", "should-be-an-int")
198+
usingEnvVar(t, "HTTP_IDLE_TIMEOUT", "should-be-a-duration")
199+
usingEnvVar(t, "X_SENDFILE_ENABLED", "should-be-a-bool")
200+
201+
c, err := NewConfig()
202+
require.NoError(t, err)
203+
204+
assert.Equal(t, 3000, c.TargetPort)
205+
assert.Equal(t, 60*time.Second, c.HttpIdleTimeout)
206+
assert.Equal(t, true, c.XSendfileEnabled)
207+
}
208+
170209
func TestConfig_return_error_when_no_upstream_command(t *testing.T) {
171210
usingProgramArgs(t, "thruster")
172211

internal/service.go

Lines changed: 130 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
11
package internal
22

33
import (
4+
"context"
45
"fmt"
56
"log/slog"
7+
"net/http"
68
"net/url"
79
"os"
10+
"os/signal"
11+
"syscall"
12+
"time"
813
)
914

1015
type Service struct {
1116
config *Config
1217
}
1318

19+
// Represents the result of the upstream process execution.
20+
type upstreamResult struct {
21+
exitCode int
22+
err error
23+
}
24+
1425
func NewService(config *Config) *Service {
1526
return &Service{
1627
config: config,
@@ -34,23 +45,135 @@ func (s *Service) Run() int {
3445
server := NewServer(s.config, handler)
3546
upstream := NewUpstreamProcess(s.config.UpstreamCommand, s.config.UpstreamArgs...)
3647

48+
s.setEnvironment()
49+
50+
// Channel to receive the result from the upstream process goroutine.
51+
resultChan := make(chan upstreamResult, 1)
52+
53+
// Run the upstream process in a separate goroutine.
54+
// This allows us to perform health checks while it starts up.
55+
go func() {
56+
exitCode, err := upstream.Run()
57+
resultChan <- upstreamResult{exitCode: exitCode, err: err}
58+
}()
59+
60+
// If a health check path is configured, wait for the upstream to become healthy.
61+
if s.config.HttpHealthPath != "" {
62+
if err := s.performHealthCheck(resultChan); err != nil {
63+
slog.Error("Upstream health check failed", "error", err)
64+
// At this point, the upstream process is running but unhealthy.
65+
upstream.Signal(syscall.SIGTERM)
66+
return 1
67+
}
68+
slog.Info("Upstream service is healthy, starting proxy server.")
69+
}
70+
71+
// Now that the upstream is ready, start the main proxy server
3772
if err := server.Start(); err != nil {
3873
return 1
3974
}
4075
defer server.Stop()
4176

42-
s.setEnvironment()
77+
// Delegate the waiting and signal handling to the new function
78+
return s.awaitTermination(upstream, resultChan)
79+
}
80+
81+
// Private
82+
83+
func (s *Service) awaitTermination(upstream *UpstreamProcess, resultChan <-chan upstreamResult) int {
84+
signalChan := make(chan os.Signal, 1)
85+
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
86+
87+
select {
88+
case result := <-resultChan:
89+
// The upstream process finished on its own.
90+
slog.Info("Wrapped process exited on its own.", "exit_code", result.exitCode)
91+
if result.err != nil {
92+
slog.Error("Wrapped process failed", "command", s.config.UpstreamCommand, "args", s.config.UpstreamArgs, "error", result.err)
93+
return 1
94+
}
95+
return result.exitCode
96+
97+
case sig := <-signalChan:
98+
// An OS signal was caught
99+
slog.Info("Received signal, shutting down.", "signal", sig.String())
100+
101+
// Relay the signal to the child process to allow for graceful shutdown.
102+
slog.Info("Relaying signal to upstream process...")
103+
if err := upstream.Signal(sig); err != nil {
104+
slog.Error("Failed to send signal to upstream process", "error", err)
105+
}
43106

44-
exitCode, err := upstream.Run()
45-
if err != nil {
46-
slog.Error("Failed to start wrapped process", "command", s.config.UpstreamCommand, "args", s.config.UpstreamArgs, "error", err)
107+
// Give the upstream process a moment to shut down gracefully
108+
// before the defer server.Stop() forcefully cleans up.
109+
select {
110+
case <-resultChan:
111+
slog.Info("Upstream process terminated gracefully after signal.")
112+
case <-time.After(10 * time.Second):
113+
slog.Warn("Upstream process did not terminate within 10 seconds of signal.")
114+
}
115+
116+
// Exit with a non-zero status code to indicate termination by signal.
47117
return 1
48118
}
49-
50-
return exitCode
51119
}
52120

53-
// Private
121+
// performHealthCheck polls the health check endpoint until it gets a 200 OK
122+
func (s *Service) performHealthCheck(resultChan <-chan upstreamResult) error {
123+
// Create a context with a 2-minute timeout for the entire health check process.
124+
ctx, cancel := context.WithTimeout(context.Background(), s.config.HttpHealthDeadline)
125+
defer cancel()
126+
127+
// We assume the upstream server binds to the target URL's host.
128+
// NOTE: You might need to adjust how this URL is constructed based on your config.
129+
healthCheckURL := fmt.Sprintf("http://%s:%d%s", s.config.HttpHealthHost, s.config.TargetPort, s.config.HttpHealthPath)
130+
slog.Info("Starting health checks", "url", healthCheckURL)
131+
132+
// Use a ticker to check every second.
133+
ticker := time.NewTicker(s.config.HttpHealthInterval)
134+
defer ticker.Stop()
135+
136+
// Create an HTTP client with a short timeout for individual requests.
137+
client := &http.Client{
138+
Timeout: s.config.HttpHealthTimeout,
139+
}
140+
141+
for {
142+
select {
143+
case <-ctx.Done():
144+
// Deadline exceeded.
145+
return fmt.Errorf("health check timed out after %v", s.config.HttpHealthDeadline)
146+
147+
case result := <-resultChan:
148+
// The upstream process exited before it became healthy.
149+
return fmt.Errorf("upstream process exited prematurely with code %d: %w", result.exitCode, result.err)
150+
151+
case <-ticker.C:
152+
// Ticker fired, time to perform a check.
153+
req, err := http.NewRequestWithContext(ctx, "GET", healthCheckURL, nil)
154+
if err != nil {
155+
return fmt.Errorf("failed to create health check request: %w", err)
156+
}
157+
158+
resp, err := client.Do(req)
159+
if err != nil {
160+
// This is expected while the server is starting up (e.g., "connection refused").
161+
slog.Debug("Health check attempt failed, retrying...", "error", err)
162+
continue
163+
}
164+
165+
// Don't forget to close the body.
166+
resp.Body.Close()
167+
168+
if resp.StatusCode == http.StatusOK {
169+
// Success!
170+
return nil
171+
}
172+
173+
slog.Debug("Health check received non-200 status", "status_code", resp.StatusCode)
174+
}
175+
}
176+
}
54177

55178
func (s *Service) cache() Cache {
56179
return NewMemoryCache(s.config.CacheSizeBytes, s.config.MaxCacheItemSizeBytes)

0 commit comments

Comments
 (0)