Skip to content

Commit c5f7f37

Browse files
authored
Fix enroll to return 503 when ES returns 503. (#5232)
* Fix enroll to return 503 when ES returns 503. * Fix import. * Add changelog. * Wrap error in invalidate.
1 parent 934f652 commit c5f7f37

File tree

4 files changed

+169
-2
lines changed

4 files changed

+169
-2
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Fix 503 handling in enrollment
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: fleet-server
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/fleet-server/pull/5232
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
issue: https://github.com/elastic/fleet-server/issues/5197

internal/pkg/apikey/create.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212

1313
"github.com/elastic/go-elasticsearch/v8"
1414
"github.com/elastic/go-elasticsearch/v8/esapi"
15+
16+
"github.com/elastic/fleet-server/v7/internal/pkg/es"
1517
)
1618

1719
// Create generates a new APIKey in Elasticsearch using the given client.
@@ -48,7 +50,7 @@ func Create(ctx context.Context, client *elasticsearch.Client, name, ttl, refres
4850
defer res.Body.Close()
4951

5052
if res.IsError() {
51-
return nil, fmt.Errorf("fail CreateAPIKey: %s", res.String())
53+
return nil, fmt.Errorf("fail CreateAPIKey: %w", es.TranslateError(res.StatusCode, nil))
5254
}
5355

5456
type APIKeyResponse struct {

internal/pkg/apikey/invalidate.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212

1313
"github.com/elastic/go-elasticsearch/v8"
1414
"github.com/elastic/go-elasticsearch/v8/esapi"
15+
16+
"github.com/elastic/fleet-server/v7/internal/pkg/es"
1517
)
1618

1719
// Invalidate invalidates the provided API keys by ID.
@@ -44,7 +46,7 @@ func Invalidate(ctx context.Context, client *elasticsearch.Client, ids ...string
4446
defer res.Body.Close()
4547

4648
if res.IsError() {
47-
return fmt.Errorf("fail InvalidateAPIKey: %s", res.String())
49+
return fmt.Errorf("fail InvalidateAPIKey: %w", es.TranslateError(res.StatusCode, nil))
4850
}
4951

5052
return nil

testing/e2e/stand_alone_test.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"context"
1111
"fmt"
1212
"html/template"
13+
"net/http"
1314
"net/http/httptest"
1415
"os"
1516
"os/exec"
@@ -23,6 +24,7 @@ import (
2324
"github.com/stretchr/testify/suite"
2425

2526
"github.com/elastic/elastic-agent-client/v7/pkg/client"
27+
"github.com/elastic/fleet-server/pkg/api"
2628
"github.com/elastic/fleet-server/testing/e2e/api_version"
2729
"github.com/elastic/fleet-server/testing/e2e/scaffold"
2830
"github.com/elastic/fleet-server/v7/version"
@@ -364,6 +366,135 @@ func (suite *StandAloneSuite) TestElasticsearch429OnStartup() {
364366
cmd.Wait()
365367
}
366368

369+
// TestElasticsearch503OnStartup will check to ensure fleet-server functions as expected (does not crash)
370+
// if Elasticsearch returns 503s on startup.
371+
func (suite *StandAloneSuite) TestElasticsearch503OnStartup() {
372+
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
373+
374+
// Create a proxy that returns 503s
375+
proxy := NewStatusProxy(suite.T(), http.StatusServiceUnavailable)
376+
proxy.Enable()
377+
server := httptest.NewServer(proxy)
378+
379+
// Create a config file from a template in the test temp dir
380+
dir := suite.T().TempDir()
381+
tpl, err := template.ParseFiles(filepath.Join("testdata", "stand-alone-http-proxy.tpl"))
382+
suite.Require().NoError(err)
383+
f, err := os.Create(filepath.Join(dir, "config.yml"))
384+
suite.Require().NoError(err)
385+
err = tpl.Execute(f, map[string]string{
386+
"Hosts": suite.ESHosts,
387+
"ServiceToken": suite.ServiceToken,
388+
"Proxy": server.URL,
389+
})
390+
f.Close()
391+
suite.Require().NoError(err)
392+
393+
// Run the fleet-server binary
394+
cmd := exec.CommandContext(ctx, suite.binaryPath, "-c", filepath.Join(dir, "config.yml"))
395+
//cmd.Stderr = os.Stderr // NOTE: This can be uncommented to put out logs
396+
cmd.Cancel = func() error {
397+
return cmd.Process.Signal(syscall.SIGTERM)
398+
}
399+
cmd.Env = []string{"GOCOVERDIR=" + suite.CoverPath}
400+
suite.T().Log("Starting fleet-server")
401+
err = cmd.Start()
402+
suite.Require().NoError(err)
403+
404+
// FIXME timeout to make sure fleet-server has started
405+
time.Sleep(5 * time.Second)
406+
suite.T().Log("Checking fleet-server status")
407+
// Wait to check that it is Starting.
408+
suite.FleetServerStatusIs(ctx, "http://localhost:8220", client.UnitStateStarting) // fleet-server returns 503:starting if upstream ES returns 429.
409+
410+
// Disable proxy and ensure fleet-server recovers
411+
suite.T().Log("Disable proxy")
412+
proxy.Disable()
413+
suite.FleetServerStatusIs(ctx, "http://localhost:8220", client.UnitStateHealthy)
414+
415+
cancel()
416+
cmd.Wait()
417+
}
418+
419+
// TestElasticsearch503OnEnroll will check to ensure fleet-server returns a 503 error when elasticsearch returns a
420+
// 503 gateway error.
421+
func (suite *StandAloneSuite) TestElasticsearch503OnEnroll() {
422+
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
423+
424+
// Create a proxy that returns 503s
425+
proxy := NewStatusProxy(suite.T(), http.StatusServiceUnavailable)
426+
proxy.Disable() // start off
427+
server := httptest.NewServer(proxy)
428+
429+
// Create a config file from a template in the test temp dir
430+
dir := suite.T().TempDir()
431+
tpl, err := template.ParseFiles(filepath.Join("testdata", "stand-alone-http-proxy.tpl"))
432+
suite.Require().NoError(err)
433+
f, err := os.Create(filepath.Join(dir, "config.yml"))
434+
suite.Require().NoError(err)
435+
err = tpl.Execute(f, map[string]any{
436+
"Hosts": suite.ESHosts,
437+
"ServiceToken": suite.ServiceToken,
438+
"Proxy": server.URL,
439+
"StaticPolicyTokenEnabled": true,
440+
"StaticTokenKey": "abcdefg",
441+
"StaticPolicyID": "dummy-policy",
442+
})
443+
f.Close()
444+
suite.Require().NoError(err)
445+
446+
// Run the fleet-server binary
447+
cmd := exec.CommandContext(ctx, suite.binaryPath, "-c", filepath.Join(dir, "config.yml"))
448+
cmd.Stderr = os.Stderr // NOTE: This can be uncommented to put out logs
449+
cmd.Cancel = func() error {
450+
return cmd.Process.Signal(syscall.SIGTERM)
451+
}
452+
cmd.Env = []string{"GOCOVERDIR=" + suite.CoverPath}
453+
suite.T().Log("Starting fleet-server")
454+
err = cmd.Start()
455+
suite.Require().NoError(err)
456+
defer func() {
457+
cancel()
458+
cmd.Wait()
459+
}()
460+
461+
// FIXME timeout to make sure fleet-server has started
462+
time.Sleep(5 * time.Second)
463+
suite.T().Log("Checking fleet-server status")
464+
// Should start healthy as the proxy is disabled.
465+
suite.FleetServerStatusIs(ctx, "http://localhost:8220", client.UnitStateHealthy)
466+
467+
// Ensure enrollment works correctly
468+
suite.T().Log("Checking enrollment works")
469+
enrollmentToken := suite.GetEnrollmentTokenForPolicyID(ctx, "dummy-policy")
470+
tester := api_version.NewClientAPITesterCurrent(
471+
suite.Scaffold,
472+
"http://localhost:8220",
473+
enrollmentToken,
474+
)
475+
tester.Enroll(ctx, enrollmentToken)
476+
477+
// Enable the proxy which will cause enrollment to fail
478+
suite.T().Log("Force 503 error from proxy")
479+
proxy.Enable()
480+
481+
// Perform enrollment again should error with 503
482+
suite.T().Log("Perform enrollment again")
483+
client, err := api.NewClientWithResponses("http://localhost:8220", api.WithHTTPClient(tester.Client), api.WithRequestEditorFn(func(ctx context.Context, req *http.Request) error {
484+
req.Header.Set("Authorization", "ApiKey "+enrollmentToken)
485+
return nil
486+
}))
487+
tester.Require().NoError(err)
488+
enrollResp, err := client.AgentEnrollWithResponse(ctx,
489+
&api.AgentEnrollParams{UserAgent: "elastic agent " + version.DefaultVersion},
490+
api.AgentEnrollJSONRequestBody{
491+
Type: api.PERMANENT,
492+
},
493+
)
494+
tester.Require().NoError(err)
495+
tester.Require().Equal(http.StatusServiceUnavailable, enrollResp.StatusCode())
496+
}
497+
367498
// TestElasticsearchTimeoutOnStartup will check to ensure fleet-server functions as expected (does not crash)
368499
// if Elasticsearch times out on startup.
369500
func (suite *StandAloneSuite) TestElasticsearchTimeoutOnStartup() {

0 commit comments

Comments
 (0)