Skip to content

Commit aab5364

Browse files
committed
TUN-8731: Implement diag/system endpoint
## Summary This PR will add a new endpoint, "diag/system" to the metrics server that collects system information from different operating systems. Closes TUN-8731
1 parent e2c2b01 commit aab5364

File tree

12 files changed

+1542
-0
lines changed

12 files changed

+1542
-0
lines changed

cmd/cloudflared/tunnel/cmd.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/cloudflare/cloudflared/config"
2929
"github.com/cloudflare/cloudflared/connection"
3030
"github.com/cloudflare/cloudflared/credentials"
31+
"github.com/cloudflare/cloudflared/diagnostic"
3132
"github.com/cloudflare/cloudflared/edgediscovery"
3233
"github.com/cloudflare/cloudflared/features"
3334
"github.com/cloudflare/cloudflared/ingress"
@@ -463,8 +464,10 @@ func StartServer(
463464
readinessServer := metrics.NewReadyServer(clientID,
464465
tunnelstate.NewConnTracker(log))
465466
observer.RegisterSink(readinessServer)
467+
diagnosticHandler := diagnostic.NewDiagnosticHandler(log, 0, diagnostic.NewSystemCollectorImpl(buildInfo.CloudflaredVersion))
466468
metricsConfig := metrics.Config{
467469
ReadyServer: readinessServer,
470+
DiagnosticHandler: diagnosticHandler,
468471
QuickTunnelHostname: quickTunnelURL,
469472
Orchestrator: orchestrator,
470473
}

diagnostic/consts.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package diagnostic
2+
3+
import "time"
4+
5+
const (
6+
defaultCollectorTimeout = time.Second * 10 // This const define the timeout value of a collector operation.
7+
collectorField = "collector" // used for logging purposes
8+
systemCollectorName = "system" // used for logging purposes
9+
)

diagnostic/error.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package diagnostic
2+
3+
import (
4+
"errors"
5+
)
6+
7+
var (
8+
// Error used when parsing the fields of the output of collector.
9+
ErrInsufficientLines = errors.New("insufficient lines")
10+
// Error used when parsing the lines of the output of collector.
11+
ErrInsuficientFields = errors.New("insufficient fields")
12+
// Error used when given key is not found while parsing KV.
13+
ErrKeyNotFound = errors.New("key not found")
14+
// Error used when tehre is no disk volume information available
15+
ErrNoVolumeFound = errors.New("No disk volume information found")
16+
)

diagnostic/handlers.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package diagnostic
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"net/http"
7+
"time"
8+
9+
"github.com/rs/zerolog"
10+
)
11+
12+
type Handler struct {
13+
log *zerolog.Logger
14+
timeout time.Duration
15+
systemCollector SystemCollector
16+
}
17+
18+
func NewDiagnosticHandler(
19+
log *zerolog.Logger,
20+
timeout time.Duration,
21+
systemCollector SystemCollector,
22+
) *Handler {
23+
if timeout == 0 {
24+
timeout = defaultCollectorTimeout
25+
}
26+
27+
return &Handler{
28+
log,
29+
timeout,
30+
systemCollector,
31+
}
32+
}
33+
34+
func (handler *Handler) SystemHandler(writer http.ResponseWriter, request *http.Request) {
35+
logger := handler.log.With().Str(collectorField, systemCollectorName).Logger()
36+
logger.Info().Msg("Collection started")
37+
38+
defer func() {
39+
logger.Info().Msg("Collection finished")
40+
}()
41+
42+
ctx, cancel := context.WithTimeout(request.Context(), handler.timeout)
43+
44+
defer cancel()
45+
46+
info, rawInfo, err := handler.systemCollector.Collect(ctx)
47+
if err != nil {
48+
logger.Error().Err(err).Msg("error occurred whilst collecting system information")
49+
50+
if rawInfo != "" {
51+
logger.Info().Msg("using raw information fallback")
52+
bytes := []byte(rawInfo)
53+
writeResponse(writer, bytes, &logger)
54+
} else {
55+
logger.Error().Msg("no raw information available")
56+
writer.WriteHeader(http.StatusInternalServerError)
57+
}
58+
59+
return
60+
}
61+
62+
if info == nil {
63+
logger.Error().Msgf("system information collection is nil")
64+
writer.WriteHeader(http.StatusInternalServerError)
65+
}
66+
67+
encoder := json.NewEncoder(writer)
68+
69+
err = encoder.Encode(info)
70+
if err != nil {
71+
logger.Error().Err(err).Msgf("error occurred whilst serializing information")
72+
writer.WriteHeader(http.StatusInternalServerError)
73+
}
74+
}
75+
76+
func writeResponse(writer http.ResponseWriter, bytes []byte, logger *zerolog.Logger) {
77+
bytesWritten, err := writer.Write(bytes)
78+
if err != nil {
79+
logger.Error().Err(err).Msg("error occurred writing response")
80+
} else if bytesWritten != len(bytes) {
81+
logger.Error().Msgf("error incomplete write response %d/%d", bytesWritten, len(bytes))
82+
}
83+
}

diagnostic/handlers_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package diagnostic_test
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"errors"
7+
"io"
8+
"net/http"
9+
"net/http/httptest"
10+
"testing"
11+
12+
"github.com/rs/zerolog"
13+
"github.com/stretchr/testify/assert"
14+
"github.com/stretchr/testify/require"
15+
16+
"github.com/cloudflare/cloudflared/diagnostic"
17+
)
18+
19+
type SystemCollectorMock struct{}
20+
21+
const (
22+
systemInformationKey = "sikey"
23+
rawInformationKey = "rikey"
24+
errorKey = "errkey"
25+
)
26+
27+
func setCtxValuesForSystemCollector(
28+
systemInfo *diagnostic.SystemInformation,
29+
rawInfo string,
30+
err error,
31+
) context.Context {
32+
ctx := context.Background()
33+
ctx = context.WithValue(ctx, systemInformationKey, systemInfo)
34+
ctx = context.WithValue(ctx, rawInformationKey, rawInfo)
35+
ctx = context.WithValue(ctx, errorKey, err)
36+
37+
return ctx
38+
}
39+
40+
func (*SystemCollectorMock) Collect(ctx context.Context) (*diagnostic.SystemInformation, string, error) {
41+
si, _ := ctx.Value(systemInformationKey).(*diagnostic.SystemInformation)
42+
ri, _ := ctx.Value(rawInformationKey).(string)
43+
err, _ := ctx.Value(errorKey).(error)
44+
45+
return si, ri, err
46+
}
47+
48+
func TestSystemHandler(t *testing.T) {
49+
t.Parallel()
50+
51+
log := zerolog.Nop()
52+
tests := []struct {
53+
name string
54+
systemInfo *diagnostic.SystemInformation
55+
rawInfo string
56+
err error
57+
statusCode int
58+
}{
59+
{
60+
name: "happy path",
61+
systemInfo: diagnostic.NewSystemInformation(
62+
0, 0, 0, 0,
63+
"string", "string", "string", "string",
64+
"string", "string", nil,
65+
),
66+
rawInfo: "",
67+
err: nil,
68+
statusCode: http.StatusOK,
69+
},
70+
{
71+
name: "on error and raw info", systemInfo: nil,
72+
rawInfo: "raw info", err: errors.New("an error"), statusCode: http.StatusOK,
73+
},
74+
{
75+
name: "on error and no raw info", systemInfo: nil,
76+
rawInfo: "", err: errors.New("an error"), statusCode: http.StatusInternalServerError,
77+
},
78+
{
79+
name: "malformed response", systemInfo: nil, rawInfo: "", err: nil, statusCode: http.StatusInternalServerError,
80+
},
81+
}
82+
83+
for _, tCase := range tests {
84+
t.Run(tCase.name, func(t *testing.T) {
85+
t.Parallel()
86+
handler := diagnostic.NewDiagnosticHandler(&log, 0, &SystemCollectorMock{})
87+
recorder := httptest.NewRecorder()
88+
ctx := setCtxValuesForSystemCollector(tCase.systemInfo, tCase.rawInfo, tCase.err)
89+
request, err := http.NewRequestWithContext(ctx, http.MethodGet, "/diag/syste,", nil)
90+
require.NoError(t, err)
91+
handler.SystemHandler(recorder, request)
92+
93+
assert.Equal(t, tCase.statusCode, recorder.Code)
94+
if tCase.statusCode == http.StatusOK && tCase.systemInfo != nil {
95+
var response diagnostic.SystemInformation
96+
97+
decoder := json.NewDecoder(recorder.Body)
98+
err = decoder.Decode(&response)
99+
require.NoError(t, err)
100+
assert.Equal(t, tCase.systemInfo, &response)
101+
} else if tCase.statusCode == http.StatusOK && tCase.rawInfo != "" {
102+
rawBytes, err := io.ReadAll(recorder.Body)
103+
require.NoError(t, err)
104+
assert.Equal(t, tCase.rawInfo, string(rawBytes))
105+
}
106+
})
107+
}
108+
}

diagnostic/system_collector.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package diagnostic
2+
3+
import "context"
4+
5+
type DiskVolumeInformation struct {
6+
Name string `json:"name"` // represents the filesystem in linux/macos or device name in windows
7+
SizeMaximum uint64 `json:"sizeMaximum"` // represents the maximum size of the disk in kilobytes
8+
SizeCurrent uint64 `json:"sizeCurrent"` // represents the current size of the disk in kilobytes
9+
}
10+
11+
func NewDiskVolumeInformation(name string, maximum, current uint64) *DiskVolumeInformation {
12+
return &DiskVolumeInformation{
13+
name,
14+
maximum,
15+
current,
16+
}
17+
}
18+
19+
type SystemInformation struct {
20+
MemoryMaximum uint64 `json:"memoryMaximum"` // represents the maximum memory of the system in kilobytes
21+
MemoryCurrent uint64 `json:"memoryCurrent"` // represents the system's memory in use in kilobytes
22+
FileDescriptorMaximum uint64 `json:"fileDescriptorMaximum"` // represents the maximum number of file descriptors of the system
23+
FileDescriptorCurrent uint64 `json:"fileDescriptorCurrent"` // represents the system's file descriptors in use
24+
OsSystem string `json:"osSystem"` // represents the operating system name i.e.: linux, windows, darwin
25+
HostName string `json:"hostName"` // represents the system host name
26+
OsVersion string `json:"osVersion"` // detailed information about the system's release version level
27+
OsRelease string `json:"osRelease"` // detailed information about the system's release
28+
Architecture string `json:"architecture"` // represents the system's hardware platform i.e: arm64/amd64
29+
CloudflaredVersion string `json:"cloudflaredVersion"` // the runtime version of cloudflared
30+
Disk []*DiskVolumeInformation `json:"disk"`
31+
}
32+
33+
func NewSystemInformation(
34+
memoryMaximum,
35+
memoryCurrent,
36+
filesMaximum,
37+
filesCurrent uint64,
38+
osystem,
39+
name,
40+
osVersion,
41+
osRelease,
42+
architecture,
43+
cloudflaredVersion string,
44+
disk []*DiskVolumeInformation,
45+
) *SystemInformation {
46+
return &SystemInformation{
47+
memoryMaximum,
48+
memoryCurrent,
49+
filesMaximum,
50+
filesCurrent,
51+
osystem,
52+
name,
53+
osVersion,
54+
osRelease,
55+
architecture,
56+
cloudflaredVersion,
57+
disk,
58+
}
59+
}
60+
61+
type SystemCollector interface {
62+
// If the collection is successful it will return `SystemInformation` struct,
63+
// an empty string, and a nil error.
64+
// In case there is an error a string with the raw data will be returned
65+
// however the returned string not contain all the data points.
66+
//
67+
// This function expects that the caller sets the context timeout to prevent
68+
// long-lived collectors.
69+
Collect(ctx context.Context) (*SystemInformation, string, error)
70+
}

0 commit comments

Comments
 (0)