Skip to content

Commit fbbcf87

Browse files
authored
feat(collectors): Store all pod logs in cluster-resources directory (#821)
* feat(collectors): Store all pod logs in cluster-resources directory All pod logs collected by the logs collector will now be stored in /cluster-resources/pods/logs/[namespace]/[pod]/[container].log. This will provide consistency and allow sbctl to find the logs when we run `kubectl logs <pod>`. To allow backwards compatibility, symlinks of the log files will be created in the current expected locations. Closes: #744
1 parent 6530cb3 commit fbbcf87

File tree

15 files changed

+318
-55
lines changed

15 files changed

+318
-55
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,6 @@ sbom/
3838

3939
# Ignore local pre-commit config
4040
.pre-commit-config.yaml
41+
42+
# Ignore generated support bundles
43+
*.tar.gz

examples/preflight/e2e.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ spec:
99
data: "5"
1010
- run:
1111
collectorName: "static-hi"
12-
image: 'alpine:3.5'
12+
image: 'alpine:3'
1313
command: ["echo", "hi static!"]
1414
analyzers:
1515
- clusterVersion:

examples/support-bundle/e2e.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ spec:
1111
data: "5"
1212
- runPod:
1313
collectorName: "static-hi"
14-
podSpec:
14+
podSpec:
1515
containers:
1616
- name: static-hi
17-
image: alpine:3.5
17+
image: alpine:3
1818
command: ["echo", "hi static!"]
1919
analyzers:
2020
- clusterVersion:

examples/support-bundle/sample-supportbundle.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ spec:
1212
limits:
1313
maxAge: 720h # 30*24
1414
maxLines: 10000
15+
- logs:
16+
collectorName: all-logs
17+
name: all-logs
18+
- runPod:
19+
collectorName: "static-hi"
20+
podSpec:
21+
containers:
22+
- name: static-hi
23+
image: alpine:3
24+
command: ["echo", "hi static!"]
1525
analyzers:
1626
- clusterVersion:
1727
outcomes:
@@ -100,3 +110,12 @@ spec:
100110
message: The API deployment has only a single ready replica.
101111
- pass:
102112
message: There are multiple replicas of the API deployment ready.
113+
- textAnalyze:
114+
checkName: Said hi!
115+
fileName: /static-hi.log
116+
regex: 'hi static'
117+
outcomes:
118+
- fail:
119+
message: Didn't say hi.
120+
- pass:
121+
message: Said hi!

pkg/collect/cluster_resources.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func (c *CollectClusterResources) Collect(progressChan chan<- interface{}) (Coll
131131
limits := &troubleshootv1beta2.LogLimits{
132132
MaxLines: 500,
133133
}
134-
podLogs, err := savePodLogs(ctx, logsRoot, client, pod, "", container.Name, limits, false)
134+
podLogs, err := savePodLogs(ctx, logsRoot, client, &pod, "", container.Name, limits, false)
135135
if err != nil {
136136
errPath := filepath.Join("cluster-resources", "pods", "logs", pod.Namespace, pod.Name, fmt.Sprintf("%s-logs-errors.log", container.Name))
137137
output.SaveResult(c.BundlePath, errPath, bytes.NewBuffer([]byte(err.Error())))

pkg/collect/logs.go

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"io"
7+
"path/filepath"
78
"strings"
89
"time"
910

@@ -70,10 +71,7 @@ func (c *CollectLogs) Collect(progressChan chan<- interface{}) (CollectorResult,
7071
}
7172

7273
for _, containerName := range containerNames {
73-
if len(containerNames) == 1 {
74-
containerName = "" // if there was only one container, use the old behavior of not including the container name in the path
75-
}
76-
podLogs, err := savePodLogs(ctx, c.BundlePath, client, pod, c.Collector.Name, containerName, c.Collector.Limits, false)
74+
podLogs, err := savePodLogs(ctx, c.BundlePath, client, &pod, c.Collector.Name, containerName, c.Collector.Limits, false)
7775
if err != nil {
7876
key := fmt.Sprintf("%s/%s-errors.json", c.Collector.Name, pod.Name)
7977
if containerName != "" {
@@ -91,7 +89,7 @@ func (c *CollectLogs) Collect(progressChan chan<- interface{}) (CollectorResult,
9189
}
9290
} else {
9391
for _, container := range c.Collector.ContainerNames {
94-
containerLogs, err := savePodLogs(ctx, c.BundlePath, client, pod, c.Collector.Name, container, c.Collector.Limits, false)
92+
containerLogs, err := savePodLogs(ctx, c.BundlePath, client, &pod, c.Collector.Name, container, c.Collector.Limits, false)
9593
if err != nil {
9694
key := fmt.Sprintf("%s/%s/%s-errors.json", c.Collector.Name, pod.Name, container)
9795
err := output.SaveResult(c.BundlePath, key, marshalErrors([]string{err.Error()}))
@@ -111,7 +109,7 @@ func (c *CollectLogs) Collect(progressChan chan<- interface{}) (CollectorResult,
111109
return output, nil
112110
}
113111

114-
func listPodsInSelectors(ctx context.Context, client *kubernetes.Clientset, namespace string, selector []string) ([]corev1.Pod, []string) {
112+
func listPodsInSelectors(ctx context.Context, client kubernetes.Interface, namespace string, selector []string) ([]corev1.Pod, []string) {
115113
serializedLabelSelector := strings.Join(selector, ",")
116114

117115
listOptions := metav1.ListOptions{
@@ -126,20 +124,54 @@ func listPodsInSelectors(ctx context.Context, client *kubernetes.Clientset, name
126124
return pods.Items, nil
127125
}
128126

129-
func savePodLogs(ctx context.Context, bundlePath string, client *kubernetes.Clientset, pod corev1.Pod, name, container string, limits *troubleshootv1beta2.LogLimits, follow bool) (CollectorResult, error) {
127+
func savePodLogs(
128+
ctx context.Context,
129+
bundlePath string,
130+
client *kubernetes.Clientset,
131+
pod *corev1.Pod,
132+
collectorName, container string,
133+
limits *troubleshootv1beta2.LogLimits,
134+
follow bool,
135+
) (CollectorResult, error) {
136+
return savePodLogsWithInterface(ctx, bundlePath, client, pod, collectorName, container, limits, follow)
137+
}
138+
139+
func savePodLogsWithInterface(
140+
ctx context.Context,
141+
bundlePath string,
142+
client kubernetes.Interface,
143+
pod *corev1.Pod,
144+
collectorName, container string,
145+
limits *troubleshootv1beta2.LogLimits,
146+
follow bool,
147+
) (CollectorResult, error) {
130148
podLogOpts := corev1.PodLogOptions{
131149
Follow: follow,
132150
Container: container,
133151
}
134152

135-
setLogLimits(&podLogOpts, limits, convertMaxAgeToTime)
153+
result := NewResult()
136154

137-
fileKey := fmt.Sprintf("%s/%s", name, pod.Name)
155+
// TODO: Abstract away hard coded directory structure paths
156+
// Maybe create a FS provider or something similar
157+
filePathPrefix := filepath.Join(
158+
"cluster-resources", "pods", "logs", pod.Namespace, pod.Name, pod.Spec.Containers[0].Name,
159+
)
160+
161+
// TODO: If collectorName is empty, the path is stored with a leading slash
162+
// Retain this behavior otherwise analysers in the wild may break
163+
// Analysers that need to find a file in the root of the bundle should
164+
// prefix the path with a slash e.g /file.txt. This behavior should be
165+
// properly deprecated in the future.
166+
linkRelPathPrefix := fmt.Sprintf("%s/%s", collectorName, pod.Name)
138167
if container != "" {
139-
fileKey = fmt.Sprintf("%s/%s/%s", name, pod.Name, container)
168+
linkRelPathPrefix = fmt.Sprintf("%s/%s/%s", collectorName, pod.Name, container)
169+
filePathPrefix = filepath.Join(
170+
"cluster-resources", "pods", "logs", pod.Namespace, pod.Name, container,
171+
)
140172
}
141173

142-
result := NewResult()
174+
setLogLimits(&podLogOpts, limits, convertMaxAgeToTime)
143175

144176
req := client.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &podLogOpts)
145177
podLogs, err := req.Stream(ctx)
@@ -148,11 +180,13 @@ func savePodLogs(ctx context.Context, bundlePath string, client *kubernetes.Clie
148180
}
149181
defer podLogs.Close()
150182

151-
logWriter, err := result.GetWriter(bundlePath, fileKey+".log")
183+
logWriter, err := result.GetWriter(bundlePath, filePathPrefix+".log")
152184
if err != nil {
153185
return nil, errors.Wrap(err, "failed to get log writer")
154186
}
155-
defer result.CloseWriter(bundlePath, fileKey+".log", logWriter)
187+
// NOTE: deferred calls are executed in LIFO order i.e called in reverse order
188+
defer result.SymLinkResult(bundlePath, linkRelPathPrefix+".log", filePathPrefix+".log")
189+
defer result.CloseWriter(bundlePath, filePathPrefix+".log", logWriter)
156190

157191
_, err = io.Copy(logWriter, podLogs)
158192
if err != nil {
@@ -168,11 +202,13 @@ func savePodLogs(ctx context.Context, bundlePath string, client *kubernetes.Clie
168202
}
169203
defer podLogs.Close()
170204

171-
prevLogWriter, err := result.GetWriter(bundlePath, fileKey+"-previous.log")
205+
prevLogWriter, err := result.GetWriter(bundlePath, filePathPrefix+"-previous.log")
172206
if err != nil {
173207
return nil, errors.Wrap(err, "failed to get previous log writer")
174208
}
175-
defer result.CloseWriter(bundlePath, fileKey+"-previous.log", logWriter)
209+
// NOTE: deferred calls are executed in LIFO order i.e called in reverse order
210+
defer result.SymLinkResult(bundlePath, linkRelPathPrefix+"-previous.log", filePathPrefix+"-previous.log")
211+
defer result.CloseWriter(bundlePath, filePathPrefix+"-previous.log", logWriter)
176212

177213
_, err = io.Copy(prevLogWriter, podLogs)
178214
if err != nil {

pkg/collect/logs_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package collect
22

33
import (
4+
"context"
45
"testing"
56
"time"
67

@@ -9,6 +10,7 @@ import (
910
"github.com/stretchr/testify/require"
1011
corev1 "k8s.io/api/core/v1"
1112
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
testclient "k8s.io/client-go/kubernetes/fake"
1214
)
1315

1416
func Test_setLogLimits(t *testing.T) {
@@ -77,3 +79,74 @@ func Test_setLogLimits(t *testing.T) {
7779
})
7880
}
7981
}
82+
83+
func Test_savePodLogs(t *testing.T) {
84+
tests := []struct {
85+
name string
86+
withContainerName bool
87+
collectorName string
88+
want CollectorResult
89+
}{
90+
{
91+
name: "with container name",
92+
withContainerName: true,
93+
collectorName: "all-logs",
94+
want: CollectorResult{
95+
"all-logs/test-pod/nginx.log": []byte("fake logs"),
96+
"all-logs/test-pod/nginx-previous.log": []byte("fake logs"),
97+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx.log": []byte("fake logs"),
98+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx-previous.log": []byte("fake logs"),
99+
},
100+
},
101+
{
102+
name: "without container name",
103+
withContainerName: false,
104+
collectorName: "all-logs",
105+
want: CollectorResult{
106+
"all-logs/test-pod.log": []byte("fake logs"),
107+
"all-logs/test-pod-previous.log": []byte("fake logs"),
108+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx.log": []byte("fake logs"),
109+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx-previous.log": []byte("fake logs"),
110+
},
111+
},
112+
{
113+
name: "without container or collector names",
114+
withContainerName: false,
115+
want: CollectorResult{
116+
"/test-pod.log": []byte("fake logs"),
117+
"/test-pod-previous.log": []byte("fake logs"),
118+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx.log": []byte("fake logs"),
119+
"cluster-resources/pods/logs/my-namespace/test-pod/nginx-previous.log": []byte("fake logs"),
120+
},
121+
},
122+
}
123+
for _, tt := range tests {
124+
t.Run(tt.name, func(t *testing.T) {
125+
ctx := context.TODO()
126+
containerName := "nginx"
127+
client := testclient.NewSimpleClientset()
128+
limits := &troubleshootv1beta2.LogLimits{
129+
MaxLines: 500,
130+
}
131+
pod, err := client.CoreV1().Pods("my-namespace").Create(ctx, &corev1.Pod{
132+
ObjectMeta: metav1.ObjectMeta{
133+
Name: "test-pod",
134+
},
135+
Spec: corev1.PodSpec{
136+
Containers: []corev1.Container{
137+
{
138+
Name: containerName,
139+
},
140+
},
141+
},
142+
}, metav1.CreateOptions{})
143+
assert.NoError(t, err)
144+
if !tt.withContainerName {
145+
containerName = ""
146+
}
147+
got, err := savePodLogsWithInterface(ctx, "", client, pod, tt.collectorName, containerName, limits, false)
148+
assert.NoError(t, err)
149+
assert.Equal(t, tt.want, got)
150+
})
151+
}
152+
}

pkg/collect/redact.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,28 @@ import (
1717

1818
func RedactResult(bundlePath string, input CollectorResult, additionalRedactors []*troubleshootv1beta2.Redact) error {
1919
for k, v := range input {
20+
file := k
21+
2022
var reader io.Reader
2123
if v == nil {
22-
r, err := input.GetReader(bundlePath, k)
24+
// Collected contents are in a file. Get a reader to the file.
25+
info, err := os.Lstat(filepath.Join(bundlePath, file))
26+
if err != nil {
27+
if os.IsNotExist(errors.Cause(err)) {
28+
// File not found, moving on.
29+
continue
30+
}
31+
return errors.Wrap(err, "failed to stat file")
32+
}
33+
34+
// Redact the target file of a symlink
35+
if info.Mode().Type() == os.ModeSymlink {
36+
file, err = os.Readlink(filepath.Join(bundlePath, file))
37+
if err != nil {
38+
return errors.Wrap(err, "failed to read symlink")
39+
}
40+
}
41+
r, err := input.GetReader(bundlePath, file)
2342
if err != nil {
2443
if os.IsNotExist(errors.Cause(err)) {
2544
continue
@@ -30,19 +49,20 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
3049

3150
reader = r
3251
} else {
52+
// Collected contents are in memory. Get a reader to the memory buffer.
3353
reader = bytes.NewBuffer(v)
3454
}
3555

3656
//If the file is .tar, .tgz or .tar.gz, it must not be redacted. Instead it is decompressed and each file inside the
3757
//tar is decompressed, redacted and compressed back into the tar.
38-
if filepath.Ext(k) == ".tar" || filepath.Ext(k) == ".tgz" || strings.HasSuffix(k, ".tar.gz") {
58+
if filepath.Ext(file) == ".tar" || filepath.Ext(file) == ".tgz" || strings.HasSuffix(file, ".tar.gz") {
3959
tmpDir, err := ioutil.TempDir("", "troubleshoot-subresult-")
4060
if err != nil {
4161
return errors.Wrap(err, "failed to create temp dir")
4262
}
4363
defer os.RemoveAll(tmpDir)
4464

45-
subResult, tarHeaders, err := decompressFile(tmpDir, reader, k)
65+
subResult, tarHeaders, err := decompressFile(tmpDir, reader, file)
4666
if err != nil {
4767
return errors.Wrap(err, "failed to decompress file")
4868
}
@@ -51,7 +71,7 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
5171
return errors.Wrap(err, "failed to redact file")
5272
}
5373

54-
dstFilename := filepath.Join(bundlePath, k)
74+
dstFilename := filepath.Join(bundlePath, file)
5575
err = compressFiles(tmpDir, subResult, tarHeaders, dstFilename)
5676
if err != nil {
5777
return errors.Wrap(err, "failed to re-compress file")
@@ -63,12 +83,12 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
6383
continue
6484
}
6585

66-
redacted, err := redact.Redact(reader, k, additionalRedactors)
86+
redacted, err := redact.Redact(reader, file, additionalRedactors)
6787
if err != nil {
68-
return errors.Wrap(err, "failed to redact")
88+
return errors.Wrap(err, "failed to redact io stream")
6989
}
7090

71-
err = input.ReplaceResult(bundlePath, k, redacted)
91+
err = input.ReplaceResult(bundlePath, file, redacted)
7292
if err != nil {
7393
return errors.Wrap(err, "failed to create redacted result")
7494
}

pkg/collect/remote_collector.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ func (c *RemoteCollector) RunCollectorSync(globalRedactors []*troubleshootv1beta
109109

110110
if err = RedactResult("", result, globalRedactors); err != nil {
111111
// Returning result on error to be consistent with local collector.
112-
return result, errors.Wrap(err, "failed to redact")
112+
return result, errors.Wrap(err, "failed to redact remote collector results")
113113
}
114114
return result, nil
115115
}

0 commit comments

Comments
 (0)