@@ -10,6 +10,7 @@ import (
1010 "context"
1111 "fmt"
1212 "html/template"
13+ "net/http"
1314 "net/http/httptest"
1415 "os"
1516 "os/exec"
@@ -23,6 +24,7 @@ import (
2324 "github.com/stretchr/testify/suite"
2425
2526 "github.com/elastic/elastic-agent-client/v7/pkg/client"
27+ "github.com/elastic/fleet-server/pkg/api"
2628 "github.com/elastic/fleet-server/testing/e2e/api_version"
2729 "github.com/elastic/fleet-server/testing/e2e/scaffold"
2830 "github.com/elastic/fleet-server/v7/version"
@@ -364,6 +366,135 @@ func (suite *StandAloneSuite) TestElasticsearch429OnStartup() {
364366 cmd .Wait ()
365367}
366368
369+ // TestElasticsearch503OnStartup will check to ensure fleet-server functions as expected (does not crash)
370+ // if Elasticsearch returns 503s on startup.
371+ func (suite * StandAloneSuite ) TestElasticsearch503OnStartup () {
372+ ctx , cancel := context .WithTimeout (context .Background (), time .Minute )
373+
374+ // Create a proxy that returns 503s
375+ proxy := NewStatusProxy (suite .T (), http .StatusServiceUnavailable )
376+ proxy .Enable ()
377+ server := httptest .NewServer (proxy )
378+
379+ // Create a config file from a template in the test temp dir
380+ dir := suite .T ().TempDir ()
381+ tpl , err := template .ParseFiles (filepath .Join ("testdata" , "stand-alone-http-proxy.tpl" ))
382+ suite .Require ().NoError (err )
383+ f , err := os .Create (filepath .Join (dir , "config.yml" ))
384+ suite .Require ().NoError (err )
385+ err = tpl .Execute (f , map [string ]string {
386+ "Hosts" : suite .ESHosts ,
387+ "ServiceToken" : suite .ServiceToken ,
388+ "Proxy" : server .URL ,
389+ })
390+ f .Close ()
391+ suite .Require ().NoError (err )
392+
393+ // Run the fleet-server binary
394+ cmd := exec .CommandContext (ctx , suite .binaryPath , "-c" , filepath .Join (dir , "config.yml" ))
395+ //cmd.Stderr = os.Stderr // NOTE: This can be uncommented to put out logs
396+ cmd .Cancel = func () error {
397+ return cmd .Process .Signal (syscall .SIGTERM )
398+ }
399+ cmd .Env = []string {"GOCOVERDIR=" + suite .CoverPath }
400+ suite .T ().Log ("Starting fleet-server" )
401+ err = cmd .Start ()
402+ suite .Require ().NoError (err )
403+
404+ // FIXME timeout to make sure fleet-server has started
405+ time .Sleep (5 * time .Second )
406+ suite .T ().Log ("Checking fleet-server status" )
407+ // Wait to check that it is Starting.
408+ suite .FleetServerStatusIs (ctx , "http://localhost:8220" , client .UnitStateStarting ) // fleet-server returns 503:starting if upstream ES returns 429.
409+
410+ // Disable proxy and ensure fleet-server recovers
411+ suite .T ().Log ("Disable proxy" )
412+ proxy .Disable ()
413+ suite .FleetServerStatusIs (ctx , "http://localhost:8220" , client .UnitStateHealthy )
414+
415+ cancel ()
416+ cmd .Wait ()
417+ }
418+
419+ // TestElasticsearch503OnEnroll will check to ensure fleet-server returns a 503 error when elasticsearch returns a
420+ // 503 gateway error.
421+ func (suite * StandAloneSuite ) TestElasticsearch503OnEnroll () {
422+ ctx , cancel := context .WithTimeout (context .Background (), time .Minute )
423+
424+ // Create a proxy that returns 503s
425+ proxy := NewStatusProxy (suite .T (), http .StatusServiceUnavailable )
426+ proxy .Disable () // start off
427+ server := httptest .NewServer (proxy )
428+
429+ // Create a config file from a template in the test temp dir
430+ dir := suite .T ().TempDir ()
431+ tpl , err := template .ParseFiles (filepath .Join ("testdata" , "stand-alone-http-proxy.tpl" ))
432+ suite .Require ().NoError (err )
433+ f , err := os .Create (filepath .Join (dir , "config.yml" ))
434+ suite .Require ().NoError (err )
435+ err = tpl .Execute (f , map [string ]any {
436+ "Hosts" : suite .ESHosts ,
437+ "ServiceToken" : suite .ServiceToken ,
438+ "Proxy" : server .URL ,
439+ "StaticPolicyTokenEnabled" : true ,
440+ "StaticTokenKey" : "abcdefg" ,
441+ "StaticPolicyID" : "dummy-policy" ,
442+ })
443+ f .Close ()
444+ suite .Require ().NoError (err )
445+
446+ // Run the fleet-server binary
447+ cmd := exec .CommandContext (ctx , suite .binaryPath , "-c" , filepath .Join (dir , "config.yml" ))
448+ cmd .Stderr = os .Stderr // NOTE: This can be uncommented to put out logs
449+ cmd .Cancel = func () error {
450+ return cmd .Process .Signal (syscall .SIGTERM )
451+ }
452+ cmd .Env = []string {"GOCOVERDIR=" + suite .CoverPath }
453+ suite .T ().Log ("Starting fleet-server" )
454+ err = cmd .Start ()
455+ suite .Require ().NoError (err )
456+ defer func () {
457+ cancel ()
458+ cmd .Wait ()
459+ }()
460+
461+ // FIXME timeout to make sure fleet-server has started
462+ time .Sleep (5 * time .Second )
463+ suite .T ().Log ("Checking fleet-server status" )
464+ // Should start healthy as the proxy is disabled.
465+ suite .FleetServerStatusIs (ctx , "http://localhost:8220" , client .UnitStateHealthy )
466+
467+ // Ensure enrollment works correctly
468+ suite .T ().Log ("Checking enrollment works" )
469+ enrollmentToken := suite .GetEnrollmentTokenForPolicyID (ctx , "dummy-policy" )
470+ tester := api_version .NewClientAPITesterCurrent (
471+ suite .Scaffold ,
472+ "http://localhost:8220" ,
473+ enrollmentToken ,
474+ )
475+ tester .Enroll (ctx , enrollmentToken )
476+
477+ // Enable the proxy which will cause enrollment to fail
478+ suite .T ().Log ("Force 503 error from proxy" )
479+ proxy .Enable ()
480+
481+ // Perform enrollment again should error with 503
482+ suite .T ().Log ("Perform enrollment again" )
483+ client , err := api .NewClientWithResponses ("http://localhost:8220" , api .WithHTTPClient (tester .Client ), api .WithRequestEditorFn (func (ctx context.Context , req * http.Request ) error {
484+ req .Header .Set ("Authorization" , "ApiKey " + enrollmentToken )
485+ return nil
486+ }))
487+ tester .Require ().NoError (err )
488+ enrollResp , err := client .AgentEnrollWithResponse (ctx ,
489+ & api.AgentEnrollParams {UserAgent : "elastic agent " + version .DefaultVersion },
490+ api.AgentEnrollJSONRequestBody {
491+ Type : api .PERMANENT ,
492+ },
493+ )
494+ tester .Require ().NoError (err )
495+ tester .Require ().Equal (http .StatusServiceUnavailable , enrollResp .StatusCode ())
496+ }
497+
367498// TestElasticsearchTimeoutOnStartup will check to ensure fleet-server functions as expected (does not crash)
368499// if Elasticsearch times out on startup.
369500func (suite * StandAloneSuite ) TestElasticsearchTimeoutOnStartup () {
0 commit comments