Skip to content

Commit 12f8bc3

Browse files
Improved error handling for support bundles and redactors for windows (#1878)
* improved error handling and window locking * Delete all-windows-collectors.yaml * addressing bugbot concerns * Update host_tcpportstatus.go * Update redact.go
1 parent b4e3ed2 commit 12f8bc3

13 files changed

+355
-55
lines changed

examples/collect/host/all-collectors.yaml

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: troubleshoot.sh/v1beta2
22
kind: SupportBundle
33
metadata:
4-
name: all-collectors-test
4+
name: all-host-collectors
55
spec:
66
hostCollectors:
77
# System Info Collectors
@@ -46,25 +46,51 @@ spec:
4646
fileSize: 10Mi
4747
operationSizeBytes: 2300
4848

49-
# Certificate
49+
# Certificate Collectors
5050
- certificate:
5151
collectorName: test-cert
5252
certificatePath: /etc/ssl/certs/ca-certificates.crt
53+
- certificatesCollection:
54+
collectorName: certs-collection
55+
paths:
56+
- /etc/ssl/certs
5357

5458
# Network Tests
5559
- tcpPortStatus:
5660
collectorName: ssh-port
5761
port: 22
62+
- udpPortStatus:
63+
collectorName: dns-port
64+
port: 53
5865
- tcpConnect:
5966
collectorName: localhost-ssh
6067
address: 127.0.0.1:22
68+
- tcpLoadBalancer:
69+
collectorName: lb-test
70+
address: 127.0.0.1
71+
port: 80
72+
- httpLoadBalancer:
73+
collectorName: http-lb-test
74+
address: 127.0.0.1
75+
port: 80
76+
path: /healthz
6177
- http:
6278
collectorName: google
6379
get:
6480
url: https://www.google.com
6581
- dns:
6682
collectorName: dns-google
67-
hostname: google.com
83+
hostnames:
84+
- google.com
85+
- subnetAvailable:
86+
collectorName: subnet-check
87+
CIDRRangeAlloc: 10.0.0.0/16
88+
desiredCIDR: 24
89+
- networkNamespaceConnectivity:
90+
collectorName: netns-connectivity
91+
fromCIDR: 10.0.0.0/8
92+
toCIDR: 192.168.0.0/16
93+
port: 80
6894

6995
# Custom Commands
7096
- run:
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
apiVersion: troubleshoot.sh/v1beta2
2+
kind: SupportBundle
3+
metadata:
4+
name: all-kubernetes-collectors
5+
spec:
6+
collectors:
7+
# Cluster Info Collectors (2)
8+
- clusterInfo: {}
9+
- clusterResources: {}
10+
11+
# Metrics Collectors (2)
12+
- customMetrics:
13+
collectorName: custom-metrics
14+
metricRequests:
15+
- resourceMetricName: example-metric
16+
- nodeMetrics: {}
17+
18+
# ConfigMap and Secret Collectors (2)
19+
- configMap:
20+
collectorName: example-configmap
21+
name: example-configmap
22+
namespace: default
23+
includeValue: false
24+
- secret:
25+
collectorName: example-secret
26+
name: example-secret
27+
namespace: default
28+
includeValue: false
29+
30+
# Logs Collector (1)
31+
- logs:
32+
collectorName: example-logs
33+
selector:
34+
- app=example
35+
namespace: default
36+
limits:
37+
maxAge: 720h
38+
maxLines: 10000
39+
40+
# Pod Execution Collectors (4)
41+
- run:
42+
collectorName: run-example
43+
name: run-example
44+
namespace: default
45+
image: busybox:latest
46+
command: ["echo"]
47+
args: ["hello from run"]
48+
- runPod:
49+
collectorName: run-pod-example
50+
name: run-pod-example
51+
namespace: default
52+
podSpec:
53+
containers:
54+
- name: example
55+
image: busybox:latest
56+
command: ["echo", "hello from runPod"]
57+
- runDaemonSet:
58+
collectorName: run-daemonset-example
59+
name: run-daemonset-example
60+
namespace: default
61+
podSpec:
62+
containers:
63+
- name: example
64+
image: busybox:latest
65+
command: ["echo", "hello from runDaemonSet"]
66+
- exec:
67+
collectorName: exec-example
68+
name: exec-example
69+
selector:
70+
- app=example
71+
namespace: default
72+
command: ["echo"]
73+
args: ["hello from exec"]
74+
75+
# Data Collector (1)
76+
- data:
77+
collectorName: static-data
78+
name: static-data.txt
79+
data: "This is static data"
80+
81+
# Copy Collectors (2)
82+
- copy:
83+
collectorName: copy-example
84+
selector:
85+
- app=example
86+
namespace: default
87+
containerPath: /tmp
88+
- copyFromHost:
89+
collectorName: copy-from-host-example
90+
name: copy-from-host-example
91+
namespace: default
92+
image: busybox:latest
93+
hostPath: /tmp/example
94+
95+
# HTTP Collector (1)
96+
- http:
97+
collectorName: http-get-example
98+
get:
99+
url: https://www.google.com
100+
insecureSkipVerify: false
101+
102+
# Database Collectors (4)
103+
- postgres:
104+
collectorName: postgres-example
105+
uri: postgresql://user:password@localhost:5432/dbname
106+
- mysql:
107+
collectorName: mysql-example
108+
uri: user:password@tcp(localhost:3306)/dbname
109+
- mssql:
110+
collectorName: mssql-example
111+
uri: sqlserver://user:password@localhost:1433?database=dbname
112+
- redis:
113+
collectorName: redis-example
114+
uri: redis://localhost:6379
115+
116+
# Storage and System Collectors (3)
117+
- collectd:
118+
collectorName: collectd-example
119+
namespace: default
120+
image: busybox:latest
121+
hostPath: /var/lib/collectd
122+
- ceph:
123+
collectorName: ceph-example
124+
namespace: rook-ceph
125+
- longhorn:
126+
collectorName: longhorn-example
127+
namespace: longhorn-system
128+
129+
# Registry and Image Collector (1)
130+
- registryImages:
131+
collectorName: registry-images-example
132+
namespace: default
133+
images:
134+
- busybox:latest
135+
136+
# Sysctl Collector (1)
137+
- sysctl:
138+
collectorName: sysctl-example
139+
name: sysctl-example
140+
namespace: default
141+
image: busybox:latest
142+
143+
# Certificate Collector (1)
144+
- certificates:
145+
collectorName: certificates-example
146+
secrets:
147+
- name: tls-secret
148+
namespaces:
149+
- default
150+
151+
# Application-Specific Collectors (3)
152+
- helm:
153+
collectorName: helm-example
154+
namespace: default
155+
releaseName: example-release
156+
collectValues: false
157+
- goldpinger:
158+
collectorName: goldpinger-example
159+
namespace: default
160+
- sonobuoy:
161+
collectorName: sonobuoy-example
162+
namespace: sonobuoy
163+
164+
# DNS and Network Collectors (2)
165+
- dns:
166+
collectorName: dns-example
167+
timeout: 10s
168+
- etcd:
169+
collectorName: etcd-example
170+
image: quay.io/coreos/etcd:latest

pkg/collect/host_certificate.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ func (c *CollectHostCertificate) IsExcluded() (bool, error) {
3232

3333
func (c *CollectHostCertificate) Collect(progressChan chan<- interface{}) (map[string][]byte, error) {
3434
var result = KeyPairValid
35+
var collectorErr error
3536

3637
_, err := tls.LoadX509KeyPair(c.hostCollector.CertificatePath, c.hostCollector.KeyPath)
3738
if err != nil {
39+
collectorErr = err
3840
if strings.Contains(err.Error(), "no such file") {
3941
result = KeyPairMissing
4042
} else if strings.Contains(err.Error(), "PEM inputs may have been switched") {
@@ -67,7 +69,7 @@ func (c *CollectHostCertificate) Collect(progressChan chan<- interface{}) (map[s
6769

6870
return map[string][]byte{
6971
name: b,
70-
}, nil
72+
}, collectorErr
7173
}
7274

7375
func isEncryptedKey(filename string) (bool, error) {

pkg/collect/host_copy.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ func (c *CollectHostCopy) Collect(progressChan chan<- interface{}) (map[string][
4848
klog.Errorf("Failed to copy files from %q to %q: %v", c.hostCollector.Path, "<bundle>/"+bundleRelPath, err)
4949
fileName := fmt.Sprintf("%s/errors.json", c.relBundlePath(bundlePathDest))
5050
output := NewResult()
51-
err := output.SaveResult(c.BundlePath, fileName, marshalErrors([]string{err.Error()}))
52-
if err != nil {
53-
return nil, err
51+
saveErr := output.SaveResult(c.BundlePath, fileName, marshalErrors([]string{err.Error()}))
52+
if saveErr != nil {
53+
return nil, saveErr
5454
}
55-
return output, nil
55+
return output, err
5656
}
5757

5858
return result, nil

pkg/collect/host_httploadbalancer.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,15 @@ func (c *CollectHostHTTPLoadBalancer) Collect(progressChan chan<- interface{}) (
8080
}()
8181

8282
var networkStatus NetworkStatus
83+
var errorMessage string
84+
var collectorErr error
8385

8486
stopAfter := time.Now().Add(timeout)
8587
for {
8688
if len(listenErr) > 0 {
8789
err := <-listenErr
90+
errorMessage = err.Error()
91+
collectorErr = errors.Wrap(err, "failed to listen on HTTP port")
8892
if strings.Contains(err.Error(), "address already in use") {
8993
networkStatus = NetworkStatusAddressInUse
9094
break
@@ -113,7 +117,8 @@ func (c *CollectHostHTTPLoadBalancer) Collect(progressChan chan<- interface{}) (
113117
}
114118

115119
result := NetworkStatusResult{
116-
Status: networkStatus,
120+
Status: networkStatus,
121+
Message: errorMessage,
117122
}
118123

119124
b, err := json.Marshal(result)
@@ -132,7 +137,7 @@ func (c *CollectHostHTTPLoadBalancer) Collect(progressChan chan<- interface{}) (
132137

133138
return map[string][]byte{
134139
name: b,
135-
}, nil
140+
}, collectorErr
136141
}
137142

138143
func attemptPOST(address string, request []byte, response []byte) NetworkStatus {

pkg/collect/host_network.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package collect
22

33
import (
44
"bytes"
5+
"fmt"
56
"net"
67
"regexp"
78
"strconv"
@@ -70,19 +71,20 @@ func isValidLoadBalancerAddress(address string) bool {
7071
return len(errs) == 0
7172
}
7273

73-
func checkTCPConnection(progressChan chan<- interface{}, listenAddress string, dialAddress string, timeout time.Duration) (NetworkStatus, error) {
74+
func checkTCPConnection(progressChan chan<- interface{}, listenAddress string, dialAddress string, timeout time.Duration) (NetworkStatus, string, error) {
7475

7576
if !isValidLoadBalancerAddress(dialAddress) {
76-
return NetworkStatusInvalidAddress, errors.Errorf("Invalid Load Balancer Address: %v", dialAddress)
77+
errMsg := fmt.Sprintf("Invalid Load Balancer Address: %v", dialAddress)
78+
return NetworkStatusInvalidAddress, errMsg, errors.New(errMsg)
7779
}
7880

7981
lstn, err := net.Listen("tcp", listenAddress)
8082
if err != nil {
8183
if strings.Contains(err.Error(), "address already in use") {
82-
return NetworkStatusAddressInUse, nil
84+
return NetworkStatusAddressInUse, err.Error(), errors.Wrap(err, "failed to create listener")
8385
}
8486

85-
return NetworkStatusErrorOther, errors.Wrap(err, "failed to create listener")
87+
return NetworkStatusErrorOther, err.Error(), errors.Wrap(err, "failed to create listener")
8688
}
8789
defer lstn.Close()
8890

@@ -110,7 +112,8 @@ func checkTCPConnection(progressChan chan<- interface{}, listenAddress string, d
110112
if time.Now().After(stopAfter) {
111113
debug.Printf("Timeout")
112114

113-
return NetworkStatusConnectionTimeout, nil
115+
errMsg := "connection timeout"
116+
return NetworkStatusConnectionTimeout, errMsg, errors.New(errMsg)
114117
}
115118

116119
conn, err := net.DialTimeout("tcp", dialAddress, 50*time.Millisecond)
@@ -124,13 +127,13 @@ func checkTCPConnection(progressChan chan<- interface{}, listenAddress string, d
124127
continue
125128
}
126129
if strings.Contains(err.Error(), "connection refused") {
127-
return NetworkStatusConnectionRefused, nil
130+
return NetworkStatusConnectionRefused, err.Error(), errors.Wrap(err, "failed to dial")
128131
}
129-
return NetworkStatusErrorOther, errors.Wrap(err, "failed to dial")
132+
return NetworkStatusErrorOther, err.Error(), errors.Wrap(err, "failed to dial")
130133
}
131134

132135
if verifyConnectionToServer(conn, requestToken, responseToken) {
133-
return NetworkStatusConnected, nil
136+
return NetworkStatusConnected, "", nil
134137
}
135138

136139
progressChan <- errors.New("failed to verify connection to server")

0 commit comments

Comments
 (0)