Skip to content

Commit 977fc43

Browse files
authored
Remote host collectors (#392)
* Add collect command and remote host collectors Adds the ability to run a host collector on a set of remote k8s nodes. Target nodes can be filtered using the --selector flag, with the same syntax as kubectl. Existing flags for --collector-image, --collector-pullpolicy and --request-timeout are used. To run on a specified node, --selector="kubernetes.io/hostname=kind-worker2" could be used. The collect command is used by the remote collector to output the results using a "raw" format, which uses the filename as the key, and the value the output as a escaped json string. When run manually it defaults to fully decoded json. The existing block devices, ipv4interfaces and services host collectors don't decode properly - the fix is to convert their slice output to a map (fix not included as unsure what depends on the existing format). The collect command is also useful for troubleshooting preflight issues. Examples are included to show remote collector usage. ``` bin/collect --collector-image=croomes/troubleshoot:latest examples/collect/remote/memory.yaml --namespace test { "kind-control-plane": { "system/memory.json": { "total": 1304207360 } }, "kind-worker": { "system/memory.json": { "total": 1695780864 } }, "kind-worker2": { "system/memory.json": { "total": 1726353408 } } } ``` The preflight command has been updated to run remote collectors. To run a host collector remotely it must be specified in the spec as a `remoteCollector`: ``` apiVersion: troubleshoot.sh/v1beta2 kind: HostPreflight metadata: name: memory spec: remoteCollectors: - memory: collectorName: memory analyzers: - memory: outcomes: - fail: when: "< 8Gi" message: At least 8Gi of memory is required - warn: when: "< 32Gi" message: At least 32Gi of memory is recommended - pass: message: The system has as sufficient memory ``` Results for each node are analyzed separately, with the node name appended to the title: ``` bin/preflight --interactive=false --collector-image=croomes/troubleshoot:latest examples/preflight/remote/memory.yaml --format=json {memory running 0 1} {memory completed 1 1} { "fail": [ { "title": "Amount of Memory (kind-worker2)", "message": "At least 8Gi of memory is required" }, { "title": "Amount of Memory (kind-worker)", "message": "At least 8Gi of memory is required" }, { "title": "Amount of Memory (kind-control-plane)", "message": "At least 8Gi of memory is required" } ] } ``` Also added a host collector to allow preflight checks of required kernel modules, which is the main driver for this change.
1 parent 4d52760 commit 977fc43

File tree

136 files changed

+11235
-70
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+11235
-70
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ preflight: generate fmt vet
5858
.PHONY: analyze
5959
analyze: generate fmt vet
6060
go build ${BUILDFLAGS} ${LDFLAGS} -o bin/analyze github.com/replicatedhq/troubleshoot/cmd/analyze
61+
62+
.PHONY: collect
63+
collect: generate fmt vet
64+
go build ${BUILDFLAGS} ${LDFLAGS} -o bin/collect github.com/replicatedhq/troubleshoot/cmd/collect
6165

6266
.PHONY: fmt
6367
fmt:

cmd/collect/cli/root.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package cli
2+
3+
import (
4+
"os"
5+
"strings"
6+
7+
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
8+
"github.com/replicatedhq/troubleshoot/pkg/logger"
9+
"github.com/spf13/cobra"
10+
"github.com/spf13/viper"
11+
)
12+
13+
func RootCmd() *cobra.Command {
14+
cmd := &cobra.Command{
15+
Use: "collect [url]",
16+
Args: cobra.MinimumNArgs(1),
17+
Short: "Run a collector",
18+
Long: `Run a collector and output the results.`,
19+
SilenceUsage: true,
20+
PreRun: func(cmd *cobra.Command, args []string) {
21+
viper.BindPFlags(cmd.Flags())
22+
},
23+
RunE: func(cmd *cobra.Command, args []string) error {
24+
v := viper.GetViper()
25+
26+
logger.SetQuiet(v.GetBool("quiet"))
27+
return runCollect(v, args[0])
28+
},
29+
}
30+
31+
cobra.OnInitialize(initConfig)
32+
33+
cmd.AddCommand(VersionCmd())
34+
35+
cmd.Flags().StringSlice("redactors", []string{}, "names of the additional redactors to use")
36+
cmd.Flags().Bool("redact", true, "enable/disable default redactions")
37+
cmd.Flags().String("format", "json", "output format, one of json or raw.")
38+
cmd.Flags().String("collector-image", "", "the full name of the collector image to use")
39+
cmd.Flags().String("collector-pull-policy", "", "the pull policy of the collector image")
40+
cmd.Flags().String("selector", "", "selector (label query) to filter remote collection nodes on.")
41+
cmd.Flags().Bool("collect-without-permissions", false, "always generate a support bundle, even if it some require additional permissions")
42+
43+
// hidden in favor of the `insecure-skip-tls-verify` flag
44+
cmd.Flags().Bool("allow-insecure-connections", false, "when set, do not verify TLS certs when retrieving spec and reporting results")
45+
cmd.Flags().MarkHidden("allow-insecure-connections")
46+
47+
viper.BindPFlags(cmd.Flags())
48+
49+
viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
50+
51+
k8sutil.AddFlags(cmd.Flags())
52+
53+
return cmd
54+
}
55+
56+
func InitAndExecute() {
57+
if err := RootCmd().Execute(); err != nil {
58+
os.Exit(1)
59+
}
60+
}
61+
62+
func initConfig() {
63+
viper.SetEnvPrefix("TROUBLESHOOT")
64+
viper.AutomaticEnv()
65+
}

cmd/collect/cli/run.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package cli
2+
3+
import (
4+
"fmt"
5+
"io/ioutil"
6+
"net/http"
7+
"os"
8+
"os/signal"
9+
"strings"
10+
"time"
11+
12+
"github.com/pkg/errors"
13+
"github.com/replicatedhq/troubleshoot/cmd/util"
14+
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
15+
"github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
16+
troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
17+
"github.com/replicatedhq/troubleshoot/pkg/collect"
18+
"github.com/replicatedhq/troubleshoot/pkg/docrewrite"
19+
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
20+
"github.com/replicatedhq/troubleshoot/pkg/specs"
21+
"github.com/replicatedhq/troubleshoot/pkg/supportbundle"
22+
"github.com/spf13/viper"
23+
"k8s.io/apimachinery/pkg/labels"
24+
)
25+
26+
const (
27+
defaultTimeout = 30 * time.Second
28+
)
29+
30+
func runCollect(v *viper.Viper, arg string) error {
31+
go func() {
32+
signalChan := make(chan os.Signal, 1)
33+
signal.Notify(signalChan, os.Interrupt)
34+
<-signalChan
35+
os.Exit(0)
36+
}()
37+
38+
var collectorContent []byte
39+
var err error
40+
if strings.HasPrefix(arg, "secret/") {
41+
// format secret/namespace-name/secret-name
42+
pathParts := strings.Split(arg, "/")
43+
if len(pathParts) != 3 {
44+
return errors.Errorf("path %s must have 3 components", arg)
45+
}
46+
47+
spec, err := specs.LoadFromSecret(pathParts[1], pathParts[2], "collect-spec")
48+
if err != nil {
49+
return errors.Wrap(err, "failed to get spec from secret")
50+
}
51+
52+
collectorContent = spec
53+
} else if _, err = os.Stat(arg); err == nil {
54+
b, err := ioutil.ReadFile(arg)
55+
if err != nil {
56+
return err
57+
}
58+
59+
collectorContent = b
60+
} else {
61+
if !util.IsURL(arg) {
62+
return fmt.Errorf("%s is not a URL and was not found (err %s)", arg, err)
63+
}
64+
65+
req, err := http.NewRequest("GET", arg, nil)
66+
if err != nil {
67+
return err
68+
}
69+
req.Header.Set("User-Agent", "Replicated_Collect/v1beta2")
70+
resp, err := http.DefaultClient.Do(req)
71+
if err != nil {
72+
return err
73+
}
74+
defer resp.Body.Close()
75+
76+
body, err := ioutil.ReadAll(resp.Body)
77+
if err != nil {
78+
return err
79+
}
80+
81+
collectorContent = body
82+
}
83+
84+
collectorContent, err = docrewrite.ConvertToV1Beta2(collectorContent)
85+
if err != nil {
86+
return errors.Wrap(err, "failed to convert to v1beta2")
87+
}
88+
89+
multidocs := strings.Split(string(collectorContent), "\n---\n")
90+
91+
troubleshootclientsetscheme.AddToScheme(scheme.Scheme)
92+
decode := scheme.Codecs.UniversalDeserializer().Decode
93+
94+
additionalRedactors := &troubleshootv1beta2.Redactor{}
95+
for idx, redactor := range v.GetStringSlice("redactors") {
96+
redactorObj, err := supportbundle.GetRedactorFromURI(redactor)
97+
if err != nil {
98+
return errors.Wrapf(err, "failed to get redactor spec %s, #%d", redactor, idx)
99+
}
100+
101+
if redactorObj != nil {
102+
additionalRedactors.Spec.Redactors = append(additionalRedactors.Spec.Redactors, redactorObj.Spec.Redactors...)
103+
}
104+
}
105+
106+
for i, additionalDoc := range multidocs {
107+
if i == 0 {
108+
continue
109+
}
110+
additionalDoc, err := docrewrite.ConvertToV1Beta2([]byte(additionalDoc))
111+
if err != nil {
112+
return errors.Wrap(err, "failed to convert to v1beta2")
113+
}
114+
obj, _, err := decode(additionalDoc, nil, nil)
115+
if err != nil {
116+
return errors.Wrapf(err, "failed to parse additional doc %d", i)
117+
}
118+
multidocRedactors, ok := obj.(*troubleshootv1beta2.Redactor)
119+
if !ok {
120+
continue
121+
}
122+
additionalRedactors.Spec.Redactors = append(additionalRedactors.Spec.Redactors, multidocRedactors.Spec.Redactors...)
123+
}
124+
125+
// make sure we don't block any senders
126+
progressCh := make(chan interface{})
127+
defer close(progressCh)
128+
go func() {
129+
for range progressCh {
130+
}
131+
}()
132+
133+
restConfig, err := k8sutil.GetRESTConfig()
134+
if err != nil {
135+
return errors.Wrap(err, "failed to convert kube flags to rest config")
136+
}
137+
138+
labelSelector, err := labels.Parse(v.GetString("selector"))
139+
if err != nil {
140+
return errors.Wrap(err, "unable to parse selector")
141+
}
142+
143+
namespace := v.GetString("namespace")
144+
if namespace == "" {
145+
namespace = "default"
146+
}
147+
148+
timeout := v.GetDuration("request-timeout")
149+
if timeout == 0 {
150+
timeout = defaultTimeout
151+
}
152+
153+
createOpts := collect.CollectorRunOpts{
154+
CollectWithoutPermissions: v.GetBool("collect-without-permissions"),
155+
KubernetesRestConfig: restConfig,
156+
Image: v.GetString("collector-image"),
157+
PullPolicy: v.GetString("collector-pullpolicy"),
158+
LabelSelector: labelSelector.String(),
159+
Namespace: namespace,
160+
Timeout: timeout,
161+
ProgressChan: progressCh,
162+
}
163+
164+
// we only support HostCollector or RemoteCollector kinds.
165+
hostCollector, err := collect.ParseHostCollectorFromDoc([]byte(multidocs[0]))
166+
if err == nil {
167+
results, err := collect.CollectHost(hostCollector, additionalRedactors, createOpts)
168+
if err != nil {
169+
return errors.Wrap(err, "failed to collect from host")
170+
}
171+
return showHostStdoutResults(v.GetString("format"), hostCollector.Name, results)
172+
}
173+
174+
remoteCollector, err := collect.ParseRemoteCollectorFromDoc([]byte(multidocs[0]))
175+
if err == nil {
176+
results, err := collect.CollectRemote(remoteCollector, additionalRedactors, createOpts)
177+
if err != nil {
178+
return errors.Wrap(err, "failed to collect from remote host(s)")
179+
}
180+
return showRemoteStdoutResults(v.GetString("format"), remoteCollector.Name, results)
181+
}
182+
183+
return errors.New("failed to parse hostCollector or remoteCollector")
184+
}

cmd/collect/cli/stdout_results.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package cli
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
7+
"github.com/pkg/errors"
8+
"github.com/replicatedhq/troubleshoot/pkg/collect"
9+
)
10+
11+
const (
12+
// FormatJSON is intended for CLI output.
13+
FormatJSON = "json"
14+
15+
// FormatRaw is intended for consumption by a remote collector. Output is a
16+
// string of quoted JSON.
17+
FormatRaw = "raw"
18+
)
19+
20+
func showHostStdoutResults(format string, collectName string, results *collect.HostCollectResult) error {
21+
switch format {
22+
case FormatJSON:
23+
return showHostStdoutResultsJSON(collectName, results.AllCollectedData)
24+
case FormatRaw:
25+
return showHostStdoutResultsRaw(collectName, results.AllCollectedData)
26+
default:
27+
return errors.Errorf("unknown output format: %q", format)
28+
}
29+
}
30+
31+
func showRemoteStdoutResults(format string, collectName string, results *collect.RemoteCollectResult) error {
32+
switch format {
33+
case FormatJSON:
34+
return showRemoteStdoutResultsJSON(collectName, results.AllCollectedData)
35+
case FormatRaw:
36+
return errors.Errorf("raw format not supported for remote collectors")
37+
default:
38+
return errors.Errorf("unknown output format: %q", format)
39+
}
40+
}
41+
42+
func showHostStdoutResultsJSON(collectName string, results map[string][]byte) error {
43+
output := make(map[string]interface{})
44+
for file, collectorResult := range results {
45+
var collectedItems map[string]interface{}
46+
if err := json.Unmarshal([]byte(collectorResult), &collectedItems); err != nil {
47+
return errors.Wrap(err, "failed to marshal collector results")
48+
}
49+
output[file] = collectedItems
50+
}
51+
52+
formatted, err := json.MarshalIndent(output, "", " ")
53+
if err != nil {
54+
return errors.Wrap(err, "failed to convert output to json")
55+
}
56+
57+
fmt.Print(string(formatted))
58+
return nil
59+
}
60+
61+
// showHostStdoutResultsRaw outputs the collector output as a string of quoted json.
62+
func showHostStdoutResultsRaw(collectName string, results map[string][]byte) error {
63+
strData := map[string]string{}
64+
for k, v := range results {
65+
strData[k] = string(v)
66+
}
67+
formatted, err := json.MarshalIndent(strData, "", " ")
68+
if err != nil {
69+
return errors.Wrap(err, "failed to convert output to json")
70+
}
71+
fmt.Print(string(formatted))
72+
return nil
73+
}
74+
75+
func showRemoteStdoutResultsJSON(collectName string, results map[string][]byte) error {
76+
type CollectorResult map[string]interface{}
77+
type NodeResult map[string]CollectorResult
78+
79+
var output = make(map[string]NodeResult)
80+
81+
for node, result := range results {
82+
var nodeResult map[string]string
83+
if err := json.Unmarshal(result, &nodeResult); err != nil {
84+
return errors.Wrap(err, "failed to marshal node results")
85+
}
86+
nr := make(NodeResult)
87+
for file, collectorResult := range nodeResult {
88+
var collectedItems map[string]interface{}
89+
if err := json.Unmarshal([]byte(collectorResult), &collectedItems); err != nil {
90+
return errors.Wrap(err, "failed to marshal collector results")
91+
}
92+
nr[file] = collectedItems
93+
}
94+
output[node] = nr
95+
}
96+
97+
formatted, err := json.MarshalIndent(output, "", " ")
98+
if err != nil {
99+
return errors.Wrap(err, "failed to convert output to json")
100+
}
101+
fmt.Print(string(formatted))
102+
return nil
103+
}

cmd/collect/cli/version.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package cli
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/replicatedhq/troubleshoot/pkg/version"
7+
"github.com/spf13/cobra"
8+
)
9+
10+
func VersionCmd() *cobra.Command {
11+
cmd := &cobra.Command{
12+
Use: "version",
13+
Short: "Print the current version and exit",
14+
Long: `Print the current version and exit`,
15+
RunE: func(cmd *cobra.Command, args []string) error {
16+
fmt.Printf("Replicated Collect %s\n", version.Version())
17+
18+
return nil
19+
},
20+
}
21+
return cmd
22+
}

cmd/collect/main.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package main
2+
3+
import (
4+
"github.com/replicatedhq/troubleshoot/cmd/collect/cli"
5+
_ "k8s.io/client-go/plugin/pkg/client/auth"
6+
)
7+
8+
func main() {
9+
cli.InitAndExecute()
10+
}

0 commit comments

Comments
 (0)