Skip to content

Commit 1859d74

Browse files
committed
TUN-8724: Add CLI command for diagnostic procedure
## Summary Adds a new CLI subcommand, under the tunnel command, the `diag`. This command has as function the automatic collection of different data points, such as, logs, metrics, network information, system information, tunnel state, and runtime information which will be written to a single zip file. Closes TUN-8724
1 parent 8ed1922 commit 1859d74

File tree

3 files changed

+184
-47
lines changed

3 files changed

+184
-47
lines changed

cmd/cloudflared/tunnel/cmd.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ func Commands() []*cli.Command {
236236
buildDeleteCommand(),
237237
buildCleanupCommand(),
238238
buildTokenCommand(),
239+
buildDiagCommand(),
239240
// for compatibility, allow following as tunnel subcommands
240241
proxydns.Command(true),
241242
cliutil.RemovedCommand("db-connect"),

cmd/cloudflared/tunnel/subcommands.go

Lines changed: 136 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,26 @@ import (
2828
"github.com/cloudflare/cloudflared/cmd/cloudflared/updater"
2929
"github.com/cloudflare/cloudflared/config"
3030
"github.com/cloudflare/cloudflared/connection"
31+
"github.com/cloudflare/cloudflared/diagnostic"
32+
"github.com/cloudflare/cloudflared/metrics"
3133
)
3234

3335
const (
34-
allSortByOptions = "name, id, createdAt, deletedAt, numConnections"
35-
connsSortByOptions = "id, startedAt, numConnections, version"
36-
CredFileFlagAlias = "cred-file"
37-
CredFileFlag = "credentials-file"
38-
CredContentsFlag = "credentials-contents"
39-
TunnelTokenFlag = "token"
40-
overwriteDNSFlagName = "overwrite-dns"
36+
allSortByOptions = "name, id, createdAt, deletedAt, numConnections"
37+
connsSortByOptions = "id, startedAt, numConnections, version"
38+
CredFileFlagAlias = "cred-file"
39+
CredFileFlag = "credentials-file"
40+
CredContentsFlag = "credentials-contents"
41+
TunnelTokenFlag = "token"
42+
overwriteDNSFlagName = "overwrite-dns"
43+
noDiagLogsFlagName = "no-diag-logs"
44+
noDiagMetricsFlagName = "no-diag-metrics"
45+
noDiagSystemFlagName = "no-diag-system"
46+
noDiagRuntimeFlagName = "no-diag-runtime"
47+
noDiagNetworkFlagName = "no-diag-network"
48+
diagContainerIDFlagName = "diag-container-id"
49+
diagPodFlagName = "diag-pod-id"
50+
metricsFlagName = "metrics"
4151

4252
LogFieldTunnelID = "tunnelID"
4353
)
@@ -179,6 +189,46 @@ var (
179189
Usage: "Source address and the interface name to send/receive ICMPv6 messages. If not provided cloudflared will dial a local address to determine the source IP or fallback to ::.",
180190
EnvVars: []string{"TUNNEL_ICMPV6_SRC"},
181191
}
192+
metricsFlag = &cli.StringFlag{
193+
Name: metricsFlagName,
194+
Usage: "The metrics server address i.e.: 127.0.0.1:12345. If your instance is running in a Docker/Kubernetes environment you need to setup port forwarding for your application.",
195+
Value: "",
196+
}
197+
diagContainerFlag = &cli.StringFlag{
198+
Name: diagContainerIDFlagName,
199+
Usage: "Container ID or Name to collect logs from",
200+
Value: "",
201+
}
202+
diagPodFlag = &cli.StringFlag{
203+
Name: diagPodFlagName,
204+
Usage: "Kubernetes POD to collect logs from",
205+
Value: "",
206+
}
207+
noDiagLogsFlag = &cli.BoolFlag{
208+
Name: noDiagLogsFlagName,
209+
Usage: "Log collection will not be performed",
210+
Value: false,
211+
}
212+
noDiagMetricsFlag = &cli.BoolFlag{
213+
Name: noDiagMetricsFlagName,
214+
Usage: "Metric collection will not be performed",
215+
Value: false,
216+
}
217+
noDiagSystemFlag = &cli.BoolFlag{
218+
Name: noDiagSystemFlagName,
219+
Usage: "System information collection will not be performed",
220+
Value: false,
221+
}
222+
noDiagRuntimeFlag = &cli.BoolFlag{
223+
Name: noDiagRuntimeFlagName,
224+
Usage: "Runtime information collection will not be performed",
225+
Value: false,
226+
}
227+
noDiagNetworkFlag = &cli.BoolFlag{
228+
Name: noDiagNetworkFlagName,
229+
Usage: "Network diagnostics won't be performed",
230+
Value: false,
231+
}
182232
)
183233

184234
func buildCreateCommand() *cli.Command {
@@ -375,7 +425,6 @@ func formatAndPrintTunnelList(tunnels []*cfapi.Tunnel, showRecentlyDisconnected
375425
}
376426

377427
func fmtConnections(connections []cfapi.Connection, showRecentlyDisconnected bool) string {
378-
379428
// Count connections per colo
380429
numConnsPerColo := make(map[string]uint, len(connections))
381430
for _, connection := range connections {
@@ -897,8 +946,10 @@ func lbRouteFromArg(c *cli.Context) (cfapi.HostnameRoute, error) {
897946
return cfapi.NewLBRoute(lbName, lbPool), nil
898947
}
899948

900-
var nameRegex = regexp.MustCompile("^[_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
901-
var hostNameRegex = regexp.MustCompile("^[*_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
949+
var (
950+
nameRegex = regexp.MustCompile("^[_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
951+
hostNameRegex = regexp.MustCompile("^[*_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
952+
)
902953

903954
func validateName(s string, allowWildcardSubdomain bool) bool {
904955
if allowWildcardSubdomain {
@@ -986,3 +1037,78 @@ SUBCOMMAND OPTIONS:
9861037
`
9871038
return fmt.Sprintf(template, parentFlagsHelp)
9881039
}
1040+
1041+
func buildDiagCommand() *cli.Command {
1042+
return &cli.Command{
1043+
Name: "diag",
1044+
Action: cliutil.ConfiguredAction(diagCommand),
1045+
Usage: "Creates a diagnostic report from a local cloudflared instance",
1046+
UsageText: "cloudflared tunnel [tunnel command options] diag [subcommand options]",
1047+
Description: "cloudflared tunnel diag will create a diagnostic report of a local cloudflared instance. The diagnostic procedure collects: logs, metrics, system information, traceroute to Cloudflare Edge, and runtime information. Since there may be multiple instances of cloudflared running the --metrics option may be provided to target a specific instance.",
1048+
Flags: []cli.Flag{
1049+
metricsFlag,
1050+
diagContainerFlag,
1051+
diagPodFlag,
1052+
noDiagLogsFlag,
1053+
noDiagMetricsFlag,
1054+
noDiagSystemFlag,
1055+
noDiagRuntimeFlag,
1056+
noDiagNetworkFlag,
1057+
},
1058+
CustomHelpTemplate: commandHelpTemplate(),
1059+
}
1060+
}
1061+
1062+
func diagCommand(ctx *cli.Context) error {
1063+
sctx, err := newSubcommandContext(ctx)
1064+
if err != nil {
1065+
return err
1066+
}
1067+
log := sctx.log
1068+
options := diagnostic.Options{
1069+
KnownAddresses: metrics.GetMetricsKnownAddresses(metrics.Runtime),
1070+
Address: sctx.c.String(metricsFlagName),
1071+
ContainerID: sctx.c.String(diagContainerIDFlagName),
1072+
PodID: sctx.c.String(diagPodFlagName),
1073+
Toggles: diagnostic.Toggles{
1074+
NoDiagLogs: sctx.c.Bool(noDiagLogsFlagName),
1075+
NoDiagMetrics: sctx.c.Bool(noDiagMetricsFlagName),
1076+
NoDiagSystem: sctx.c.Bool(noDiagSystemFlagName),
1077+
NoDiagRuntime: sctx.c.Bool(noDiagRuntimeFlagName),
1078+
NoDiagNetwork: sctx.c.Bool(noDiagNetworkFlagName),
1079+
},
1080+
}
1081+
1082+
if options.Address == "" {
1083+
log.Info().Msg("If your instance is running in a Docker/Kubernetes environment you need to setup port forwarding for your application.")
1084+
}
1085+
1086+
states, err := diagnostic.RunDiagnostic(log, options)
1087+
1088+
if errors.Is(err, diagnostic.ErrMetricsServerNotFound) {
1089+
log.Warn().Msg("No instances found")
1090+
return nil
1091+
}
1092+
if errors.Is(err, diagnostic.ErrMultipleMetricsServerFound) {
1093+
if states != nil {
1094+
log.Info().Msgf("Found multiple instances running:")
1095+
for _, state := range states {
1096+
log.Info().Msgf("Instance: tunnel-id=%s connector-id=%s metrics-address=%s", state.TunnelID, state.ConnectorID, state.URL.String())
1097+
}
1098+
log.Info().Msgf("To select one instance use the option --metrics")
1099+
}
1100+
return nil
1101+
}
1102+
1103+
if errors.Is(err, diagnostic.ErrLogConfigurationIsInvalid) {
1104+
log.Info().Msg("Couldn't extract logs from the instance. If the instance is running in a containerized environment use the option --diag-container-id or --diag-pod-id. If there is no logging configuration use --no-diag-logs.")
1105+
}
1106+
1107+
if err != nil {
1108+
log.Warn().Msg("Diagnostic completed with one or more errors")
1109+
} else {
1110+
log.Info().Msg("Diagnostic completed")
1111+
}
1112+
1113+
return nil
1114+
}

diagnostic/diagnostic.go

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/url"
1010
"os"
1111
"path/filepath"
12+
"strings"
1213
"sync"
1314
"time"
1415

@@ -162,17 +163,7 @@ func collectNetworkResultRoutine(
162163
}
163164

164165
hops, raw, err := collector.Collect(ctx, network.NewTraceOptions(hopsNo, timeout, hostname, useIPv4))
165-
if err != nil {
166-
if raw == "" {
167-
// An error happened and there is no raw output
168-
results <- networkCollectionResult{name, nil, "", err}
169-
} else {
170-
// An error happened and there is raw output then write to file
171-
results <- networkCollectionResult{name, nil, raw, nil}
172-
}
173-
} else {
174-
results <- networkCollectionResult{name, hops, raw, nil}
175-
}
166+
results <- networkCollectionResult{name, hops, raw, err}
176167
}
177168

178169
func gatherNetworkInformation(ctx context.Context) map[string]networkCollectionResult {
@@ -209,10 +200,6 @@ func gatherNetworkInformation(ctx context.Context) map[string]networkCollectionR
209200

210201
for range len(hostAndIPversionPairs) {
211202
result := <-results
212-
if result.err != nil {
213-
continue
214-
}
215-
216203
resultMap[result.name] = result
217204
}
218205

@@ -249,22 +236,30 @@ func rawNetworkInformationWriter(resultMap map[string]networkCollectionResult) (
249236

250237
defer networkDumpHandle.Close()
251238

239+
var exitErr error
240+
252241
for k, v := range resultMap {
253-
_, err := networkDumpHandle.WriteString(k + "\n" + v.raw + "\n")
254-
if err != nil {
255-
return "", fmt.Errorf("error writing raw network information: %w", err)
242+
if v.err != nil {
243+
if exitErr == nil {
244+
exitErr = v.err
245+
}
246+
247+
_, err := networkDumpHandle.WriteString(k + "\nno content\n")
248+
if err != nil {
249+
return networkDumpHandle.Name(), fmt.Errorf("error writing 'no content' to raw network file: %w", err)
250+
}
251+
} else {
252+
_, err := networkDumpHandle.WriteString(k + "\n" + v.raw + "\n")
253+
if err != nil {
254+
return networkDumpHandle.Name(), fmt.Errorf("error writing raw network information: %w", err)
255+
}
256256
}
257257
}
258258

259-
return networkDumpHandle.Name(), nil
259+
return networkDumpHandle.Name(), exitErr
260260
}
261261

262262
func jsonNetworkInformationWriter(resultMap map[string]networkCollectionResult) (string, error) {
263-
jsonMap := make(map[string][]*network.Hop, len(resultMap))
264-
for k, v := range resultMap {
265-
jsonMap[k] = v.info
266-
}
267-
268263
networkDumpHandle, err := os.Create(filepath.Join(os.TempDir(), networkBaseName))
269264
if err != nil {
270265
return "", ErrCreatingTemporaryFile
@@ -274,12 +269,23 @@ func jsonNetworkInformationWriter(resultMap map[string]networkCollectionResult)
274269

275270
encoder := newFormattedEncoder(networkDumpHandle)
276271

272+
var exitErr error
273+
274+
jsonMap := make(map[string][]*network.Hop, len(resultMap))
275+
for k, v := range resultMap {
276+
jsonMap[k] = v.info
277+
278+
if exitErr == nil && v.err != nil {
279+
exitErr = v.err
280+
}
281+
}
282+
277283
err = encoder.Encode(jsonMap)
278284
if err != nil {
279-
return "", fmt.Errorf("error encoding network information results: %w", err)
285+
return networkDumpHandle.Name(), fmt.Errorf("error encoding network information results: %w", err)
280286
}
281287

282-
return networkDumpHandle.Name(), nil
288+
return networkDumpHandle.Name(), exitErr
283289
}
284290

285291
func collectFromEndpointAdapter(collect collectToWriterFunc, fileName string) collectFunc {
@@ -292,7 +298,7 @@ func collectFromEndpointAdapter(collect collectToWriterFunc, fileName string) co
292298

293299
err = collect(ctx, dumpHandle)
294300
if err != nil {
295-
return "", fmt.Errorf("error running collector: %w", err)
301+
return dumpHandle.Name(), fmt.Errorf("error running collector: %w", err)
296302
}
297303

298304
return dumpHandle.Name(), nil
@@ -316,8 +322,11 @@ func tunnelStateCollectEndpointAdapter(client HTTPClient, tunnel *TunnelState, f
316322
encoder := newFormattedEncoder(writer)
317323

318324
err := encoder.Encode(tunnel)
325+
if err != nil {
326+
return fmt.Errorf("error encoding tunnel state: %w", err)
327+
}
319328

320-
return fmt.Errorf("error encoding tunnel state: %w", err)
329+
return nil
321330
}
322331

323332
return collectFromEndpointAdapter(endpointFunc, fileName)
@@ -337,15 +346,14 @@ func resolveInstanceBaseURL(
337346
addresses []string,
338347
) (*url.URL, *TunnelState, []*AddressableTunnelState, error) {
339348
if metricsServerAddress != "" {
349+
if !strings.HasPrefix(metricsServerAddress, "http://") {
350+
metricsServerAddress = "http://" + metricsServerAddress
351+
}
340352
url, err := url.Parse(metricsServerAddress)
341353
if err != nil {
342354
return nil, nil, nil, fmt.Errorf("provided address is not valid: %w", err)
343355
}
344356

345-
if url.Scheme == "" {
346-
url.Scheme = "http://"
347-
}
348-
349357
return url, nil, nil, nil
350358
}
351359

@@ -526,9 +534,15 @@ func RunDiagnostic(
526534
jobsReport := runJobs(ctx, jobs, log)
527535
paths := make([]string, 0)
528536

537+
var gerr error
538+
529539
for _, v := range jobsReport {
530540
paths = append(paths, v.path)
531541

542+
if gerr == nil && v.Err != nil {
543+
gerr = v.Err
544+
}
545+
532546
defer func() {
533547
if !errors.Is(v.Err, ErrCreatingTemporaryFile) {
534548
os.Remove(v.path)
@@ -538,14 +552,10 @@ func RunDiagnostic(
538552

539553
zipfile, err := CreateDiagnosticZipFile(zipName, paths)
540554
if err != nil {
541-
if zipfile != "" {
542-
os.Remove(zipfile)
543-
}
544-
545555
return nil, err
546556
}
547557

548558
log.Info().Msgf("Diagnostic file written: %v", zipfile)
549559

550-
return nil, nil
560+
return nil, gerr
551561
}

0 commit comments

Comments
 (0)